SLPVectorizer: Only vectorize intrinsics whose operands are widened equally

The vectorizer only knows how to vectorize intrinics by widening all operands by
the same factor.

Patch by Tyler Nowicki!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205855 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Arnold Schwaighofer 2014-04-09 14:20:47 +00:00
parent 0d5d656524
commit b0ee2374ce
4 changed files with 100 additions and 26 deletions

View File

@ -0,0 +1,54 @@
//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines some vectorizer utilities.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
namespace llvm {
/// \brief Identify if the intrinsic is trivially vectorizable.
///
/// This method returns true if the intrinsic's argument types are all
/// scalars for the scalar form of the intrinsic and all vectors for
/// the vector form of the intrinsic.
static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::ctpop:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
return true;
default:
return false;
}
}
} // llvm namespace
#endif

View File

@ -91,6 +91,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
@ -2266,32 +2267,12 @@ static Intrinsic::ID
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
switch (II->getIntrinsicID()) {
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return II->getIntrinsicID();
default:
Intrinsic::ID ID = II->getIntrinsicID();
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
ID == Intrinsic::lifetime_end)
return ID;
else
return Intrinsic::not_intrinsic;
}
}
if (!TLI)

View File

@ -41,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
@ -949,7 +950,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::Call: {
// Check if the calls are all to the same vectorizable intrinsic.
IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
if (II==NULL) {
Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
if (!isTriviallyVectorizable(ID)) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;

View File

@ -0,0 +1,36 @@
; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000
target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx--nvidiacl"
; CTLZ cannot be vectorized currently because the second argument is a scalar
; for both the scalar and vector forms of the intrinsic. In the future it
; should be possible to vectorize such functions.
; Test causes an assert if LLVM tries to vectorize CTLZ.
define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
entry:
%0 = extractelement <2 x i8> %x, i32 0
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
%vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
%1 = extractelement <2 x i8> %x, i32 1
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
ret <2 x i8> %vecinit2
}
define <2 x i8> @cltz_test2(<2 x i8> %x) #1 {
entry:
%0 = extractelement <2 x i8> %x, i32 0
%1 = extractelement <2 x i8> %x, i32 1
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
%vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
ret <2 x i8> %vecinit2
}
declare i8 @llvm.ctlz.i8(i8, i1) #3
attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }