diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index 889705e95fc..65060dc39d2 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -708,9 +708,10 @@ VectorType::VectorType(Type *ElType, unsigned NumEl) VectorType *VectorType::get(Type *elementType, unsigned NumElements) { Type *ElementType = const_cast(elementType); assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0"); - assert(isValidElementType(ElementType) && - "Elements of a VectorType must be a primitive type"); - + assert(isValidElementType(ElementType) && "Element type of a VectorType must " + "be an integer, floating point, or " + "pointer type."); + LLVMContextImpl *pImpl = ElementType->getContext().pImpl; VectorType *&Entry = ElementType->getContext().pImpl ->VectorTypes[std::make_pair(ElementType, NumElements)]; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 38763beb744..baf97418b56 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -84,6 +84,18 @@ static const unsigned AliasedCheckLimit = 10; // This limit is useful for very large basic blocks. static const unsigned MaxMemDepDistance = 160; +/// \brief Predicate for the element types that the SLP vectorizer supports. +/// +/// The most important thing to filter here are types which are invalid in LLVM +/// vectors. We also filter target specific types which have absolutely no +/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just +/// avoids spending time checking the cost model and realizing that they will +/// be inevitably scalarized. +static bool isValidElementType(Type *Ty) { + return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() && + !Ty->isPPC_FP128Ty(); +} + /// \returns the parent basic block if all of the instructions in \p VL /// are in the same block or null otherwise. static BasicBlock *getSameBlock(ArrayRef VL) { @@ -1148,7 +1160,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { Type *SrcTy = VL0->getOperand(0)->getType(); for (unsigned i = 0; i < VL.size(); ++i) { Type *Ty = cast(VL[i])->getOperand(0)->getType(); - if (Ty != SrcTy || Ty->isAggregateType() || Ty->isVectorTy()) { + if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); @@ -3294,7 +3306,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { // Check that the pointer points to scalars. Type *Ty = SI->getValueOperand()->getType(); - if (Ty->isAggregateType() || Ty->isVectorTy()) + if (!isValidElementType(Ty)) continue; // Find the base pointer. @@ -3335,7 +3347,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, for (int i = 0, e = VL.size(); i < e; ++i) { Type *Ty = VL[i]->getType(); - if (Ty->isAggregateType() || Ty->isVectorTy()) + if (!isValidElementType(Ty)) return false; Instruction *Inst = dyn_cast(VL[i]); if (!Inst || Inst->getOpcode() != Opcode0) @@ -3555,7 +3567,7 @@ public: return false; Type *Ty = B->getType(); - if (Ty->isVectorTy()) + if (!isValidElementType(Ty)) return false; ReductionOpcode = B->getOpcode(); diff --git a/test/Transforms/SLPVectorizer/X86/bad_types.ll b/test/Transforms/SLPVectorizer/X86/bad_types.ll new file mode 100644 index 00000000000..38ed18dad2a --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/bad_types.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) { +; Ensure we can handle x86_mmx values which are primitive and can be bitcast +; with integer types but can't be put into a vector. +; +; CHECK-LABEL: @test1 +; CHECK: store i64 +; CHECK: store i64 +; CHECK: ret void +entry: + %a.cast = bitcast x86_mmx %a to i64 + %b.cast = bitcast x86_mmx %b to i64 + %a.and = and i64 %a.cast, 42 + %b.and = and i64 %b.cast, 42 + %gep = getelementptr i64* %ptr, i32 1 + store i64 %a.and, i64* %ptr + store i64 %b.and, i64* %gep + ret void +} + +define void @test2(x86_mmx %a, x86_mmx %b) { +; Same as @test1 but using phi-input vectorization instead of store +; vectorization. +; +; CHECK-LABEL: @test2 +; CHECK: and i64 +; CHECK: and i64 +; CHECK: ret void +entry: + br i1 undef, label %if.then, label %exit + +if.then: + %a.cast = bitcast x86_mmx %a to i64 + %b.cast = bitcast x86_mmx %b to i64 + %a.and = and i64 %a.cast, 42 + %b.and = and i64 %b.cast, 42 + br label %exit + +exit: + %a.phi = phi i64 [ 0, %entry ], [ %a.and, %if.then ] + %b.phi = phi i64 [ 0, %entry ], [ %b.and, %if.then ] + tail call void @f(i64 %a.phi, i64 %b.phi) + ret void +} + +declare void @f(i64, i64)