diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index b266d9dc09a..cb6609f7678 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -215,10 +215,6 @@ private: /// broadcast them into a vector. VectorParts &getVectorValue(Value *V); - /// Get a uniform vector of constant integers. We use this to get - /// vectors of ones and zeros for the reduction code. - Constant* getUniformVector(unsigned Val, Type* ScalarTy); - /// Generate a shuffle sequence that will reverse the vector Vec. Value *reverseVector(Value *Vec); @@ -325,12 +321,14 @@ public: /// This enum represents the kinds of reductions that we support. enum ReductionKind { - NoReduction, ///< Not a reduction. - IntegerAdd, ///< Sum of numbers. - IntegerMult, ///< Product of numbers. - IntegerOr, ///< Bitwise or logical OR of numbers. - IntegerAnd, ///< Bitwise or logical AND of numbers. - IntegerXor ///< Bitwise or logical XOR of numbers. + RK_NoReduction, ///< Not a reduction. + RK_IntegerAdd, ///< Sum of integers. + RK_IntegerMult, ///< Product of integers. + RK_IntegerOr, ///< Bitwise or logical OR of numbers. + RK_IntegerAnd, ///< Bitwise or logical AND of numbers. + RK_IntegerXor, ///< Bitwise or logical XOR of numbers. + RK_FloatAdd, ///< Sum of floats. + RK_FloatMult ///< Product of floats. }; /// This enum represents the kinds of inductions that we support. @@ -343,8 +341,8 @@ public: /// This POD struct holds information about reduction variables. struct ReductionDescriptor { - ReductionDescriptor() : StartValue(0), LoopExitInstr(0), Kind(NoReduction) { - } + ReductionDescriptor() : StartValue(0), LoopExitInstr(0), + Kind(RK_NoReduction) {} ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K) : StartValue(Start), LoopExitInstr(Exit), Kind(K) {} @@ -790,11 +788,6 @@ InnerLoopVectorizer::getVectorValue(Value *V) { return WidenMap.get(V); } -Constant* -InnerLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { - return ConstantVector::getSplat(VF, ConstantInt::get(ScalarTy, Val, true)); -} - Value *InnerLoopVectorizer::reverseVector(Value *Vec) { assert(Vec->getType()->isVectorTy() && "Invalid type"); SmallVector ShuffleMask; @@ -1215,20 +1208,26 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { /// This function returns the identity element (or neutral element) for /// the operation K. -static unsigned -getReductionIdentity(LoopVectorizationLegality::ReductionKind K) { +static Constant* +getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) { switch (K) { - case LoopVectorizationLegality::IntegerXor: - case LoopVectorizationLegality::IntegerAdd: - case LoopVectorizationLegality::IntegerOr: + case LoopVectorizationLegality:: RK_IntegerXor: + case LoopVectorizationLegality:: RK_IntegerAdd: + case LoopVectorizationLegality:: RK_IntegerOr: // Adding, Xoring, Oring zero to a number does not change it. - return 0; - case LoopVectorizationLegality::IntegerMult: + return ConstantInt::get(Tp, 0); + case LoopVectorizationLegality:: RK_IntegerMult: // Multiplying a number by 1 does not change it. - return 1; - case LoopVectorizationLegality::IntegerAnd: + return ConstantInt::get(Tp, 1); + case LoopVectorizationLegality:: RK_IntegerAnd: // AND-ing a number with an all-1 value does not change it. - return -1; + return ConstantInt::get(Tp, -1, true); + case LoopVectorizationLegality:: RK_FloatMult: + // Multiplying a number by 1 does not change it. + return ConstantFP::get(Tp, 1.0L); + case LoopVectorizationLegality:: RK_FloatAdd: + // Adding zero to a number does not change it. + return ConstantFP::get(Tp, 0.0L); default: llvm_unreachable("Unknown reduction kind"); } @@ -1329,8 +1328,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Find the reduction identity variable. Zero for addition, or, xor, // one for multiplication, -1 for And. - Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind), - VecTy->getScalarType()); + Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType()); + Constant *Identity = ConstantVector::getSplat(VF, Iden); // This vector is the Identity vector where the first element is the // incoming scalar reduction. @@ -1378,26 +1377,34 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { Value *ReducedPartRdx = RdxParts[0]; for (unsigned part = 1; part < UF; ++part) { switch (RdxDesc.Kind) { - case LoopVectorizationLegality::IntegerAdd: + case LoopVectorizationLegality::RK_IntegerAdd: ReducedPartRdx = Builder.CreateAdd(RdxParts[part], ReducedPartRdx, "add.rdx"); break; - case LoopVectorizationLegality::IntegerMult: + case LoopVectorizationLegality::RK_IntegerMult: ReducedPartRdx = Builder.CreateMul(RdxParts[part], ReducedPartRdx, "mul.rdx"); break; - case LoopVectorizationLegality::IntegerOr: + case LoopVectorizationLegality::RK_IntegerOr: ReducedPartRdx = Builder.CreateOr(RdxParts[part], ReducedPartRdx, "or.rdx"); break; - case LoopVectorizationLegality::IntegerAnd: + case LoopVectorizationLegality::RK_IntegerAnd: ReducedPartRdx = Builder.CreateAnd(RdxParts[part], ReducedPartRdx, "and.rdx"); break; - case LoopVectorizationLegality::IntegerXor: + case LoopVectorizationLegality::RK_IntegerXor: ReducedPartRdx = Builder.CreateXor(RdxParts[part], ReducedPartRdx, "xor.rdx"); break; + case LoopVectorizationLegality::RK_FloatMult: + ReducedPartRdx = + Builder.CreateFMul(RdxParts[part], ReducedPartRdx, "fmul.rdx"); + break; + case LoopVectorizationLegality::RK_FloatAdd: + ReducedPartRdx = + Builder.CreateFAdd(RdxParts[part], ReducedPartRdx, "fadd.rdx"); + break; default: llvm_unreachable("Unknown reduction operation"); } @@ -1428,21 +1435,27 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Emit the operation on the shuffled value. switch (RdxDesc.Kind) { - case LoopVectorizationLegality::IntegerAdd: + case LoopVectorizationLegality::RK_IntegerAdd: TmpVec = Builder.CreateAdd(TmpVec, Shuf, "add.rdx"); break; - case LoopVectorizationLegality::IntegerMult: + case LoopVectorizationLegality::RK_IntegerMult: TmpVec = Builder.CreateMul(TmpVec, Shuf, "mul.rdx"); break; - case LoopVectorizationLegality::IntegerOr: + case LoopVectorizationLegality::RK_IntegerOr: TmpVec = Builder.CreateOr(TmpVec, Shuf, "or.rdx"); break; - case LoopVectorizationLegality::IntegerAnd: + case LoopVectorizationLegality::RK_IntegerAnd: TmpVec = Builder.CreateAnd(TmpVec, Shuf, "and.rdx"); break; - case LoopVectorizationLegality::IntegerXor: + case LoopVectorizationLegality::RK_IntegerXor: TmpVec = Builder.CreateXor(TmpVec, Shuf, "xor.rdx"); break; + case LoopVectorizationLegality::RK_FloatMult: + TmpVec = Builder.CreateFMul(TmpVec, Shuf, "fmul.rdx"); + break; + case LoopVectorizationLegality::RK_FloatAdd: + TmpVec = Builder.CreateFAdd(TmpVec, Shuf, "fadd.rdx"); + break; default: llvm_unreachable("Unknown reduction operation"); } @@ -2074,6 +2087,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Check that this PHI type is allowed. if (!Phi->getType()->isIntegerTy() && + !Phi->getType()->isFloatingPointTy() && !Phi->getType()->isPointerTy()) { DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); return false; @@ -2105,26 +2119,34 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { continue; } - if (AddReductionVar(Phi, IntegerAdd)) { + if (AddReductionVar(Phi, RK_IntegerAdd)) { DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n"); continue; } - if (AddReductionVar(Phi, IntegerMult)) { + if (AddReductionVar(Phi, RK_IntegerMult)) { DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n"); continue; } - if (AddReductionVar(Phi, IntegerOr)) { + if (AddReductionVar(Phi, RK_IntegerOr)) { DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n"); continue; } - if (AddReductionVar(Phi, IntegerAnd)) { + if (AddReductionVar(Phi, RK_IntegerAnd)) { DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n"); continue; } - if (AddReductionVar(Phi, IntegerXor)) { + if (AddReductionVar(Phi, RK_IntegerXor)) { DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n"); continue; } + if (AddReductionVar(Phi, RK_FloatMult)) { + DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n"); + continue; + } + if (AddReductionVar(Phi, RK_FloatAdd)) { + DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n"); + continue; + } DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); return false; @@ -2419,6 +2441,8 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // This includes users of the reduction, variables (which form a cycle // which ends in the phi node). Instruction *ExitInstruction = 0; + // Indicates that we found a binary operation in our scan. + bool FoundBinOp = false; // Iter is our iterator. We start with the PHI node and scan for all of the // users of this instruction. All users must be instructions that can be @@ -2436,6 +2460,9 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Did we reach the initial PHI node already ? bool FoundStartPHI = false; + // Is this a bin op ? + FoundBinOp |= !isa(Iter); + // For each of the *users* of iter. for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end(); it != e; ++it) { @@ -2475,7 +2502,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Reductions of instructions such as Div, and Sub is only // possible if the LHS is the reduction variable. - if (!U->isCommutative() && U->getOperand(0) != Iter) + if (!U->isCommutative() && !isa(U) && U->getOperand(0) != Iter) return false; Iter = U; @@ -2484,46 +2511,52 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // We found a reduction var if we have reached the original // phi node and we only have a single instruction with out-of-loop // users. - if (FoundStartPHI && ExitInstruction) { + if (FoundStartPHI) { // This instruction is allowed to have out-of-loop users. AllowedExit.insert(ExitInstruction); // Save the description of this reduction variable. ReductionDescriptor RD(RdxStart, ExitInstruction, Kind); Reductions[Phi] = RD; - return true; + // We've ended the cycle. This is a reduction variable if we have an + // outside user and it has a binary op. + return FoundBinOp && ExitInstruction; } - - // If we've reached the start PHI but did not find an outside user then - // this is dead code. Abort. - if (FoundStartPHI) - return false; } } bool LoopVectorizationLegality::isReductionInstr(Instruction *I, ReductionKind Kind) { + bool FP = I->getType()->isFloatingPointTy(); + bool FastMath = (FP && I->isCommutative() && I->isAssociative()); + switch (I->getOpcode()) { default: return false; case Instruction::PHI: + if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd)) + return false; // possibly. return true; case Instruction::Sub: case Instruction::Add: - return Kind == IntegerAdd; + return Kind == RK_IntegerAdd; case Instruction::SDiv: case Instruction::UDiv: case Instruction::Mul: - return Kind == IntegerMult; + return Kind == RK_IntegerMult; case Instruction::And: - return Kind == IntegerAnd; + return Kind == RK_IntegerAnd; case Instruction::Or: - return Kind == IntegerOr; + return Kind == RK_IntegerOr; case Instruction::Xor: - return Kind == IntegerXor; - } + return Kind == RK_IntegerXor; + case Instruction::FMul: + return Kind == RK_FloatMult && FastMath; + case Instruction::FAdd: + return Kind == RK_FloatAdd && FastMath; + } } LoopVectorizationLegality::InductionKind diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll new file mode 100644 index 00000000000..565684cccb9 --- /dev/null +++ b/test/Transforms/LoopVectorize/float-reduction.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" +;CHECK: @foo +;CHECK: fadd <4 x float> +;CHECK: ret +define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %add = fadd fast float %sum.04, %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret float %add +} + +!0 = metadata !{metadata !"float", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"}