diff --git a/include/llvm/Transforms/Utils/VectorUtils.h b/include/llvm/Transforms/Utils/VectorUtils.h index 44a7149eee9..f796e570184 100644 --- a/include/llvm/Transforms/Utils/VectorUtils.h +++ b/include/llvm/Transforms/Utils/VectorUtils.h @@ -99,7 +99,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { if (IntrinsicInst *II = dyn_cast(CI)) { Intrinsic::ID ID = II->getIntrinsicID(); if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || - ID == Intrinsic::lifetime_end) + ID == Intrinsic::lifetime_end || ID == Intrinsic::assume) return ID; else return Intrinsic::not_intrinsic; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index d45b8d8a72b..f3bd96c028c 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -55,7 +55,9 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" @@ -884,8 +886,12 @@ public: LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const DataLayout *DL, const TargetLibraryInfo *TLI, - const Function *F, const LoopVectorizeHints *Hints) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), TheFunction(F), Hints(Hints) {} + AssumptionTracker *AT, const Function *F, + const LoopVectorizeHints *Hints) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), + AT(AT), TheFunction(F), Hints(Hints) { + CodeMetrics::collectEphemeralValues(L, AT, EphValues); + } /// Information about vectorization costs struct VectorizationFactor { @@ -954,6 +960,9 @@ private: *TheFunction, DL, Message.str()); } + /// Values used only by @llvm.assume calls. + SmallPtrSet EphValues; + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -968,6 +977,8 @@ private: const DataLayout *DL; /// Target Library Info. const TargetLibraryInfo *TLI; + /// Tracker for @llvm.assume. + AssumptionTracker *AT; const Function *TheFunction; // Loop Vectorize Hint. const LoopVectorizeHints *Hints; @@ -1251,6 +1262,7 @@ struct LoopVectorize : public FunctionPass { BlockFrequencyInfo *BFI; TargetLibraryInfo *TLI; AliasAnalysis *AA; + AssumptionTracker *AT; bool DisableUnrolling; bool AlwaysVectorize; @@ -1266,6 +1278,7 @@ struct LoopVectorize : public FunctionPass { BFI = &getAnalysis(); TLI = getAnalysisIfAvailable(); AA = &getAnalysis(); + AT = &getAnalysis(); // Compute some weights outside of the loop over the loops. Compute this // using a BranchProbability to re-use its scaling math. @@ -1384,7 +1397,8 @@ struct LoopVectorize : public FunctionPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, F, &Hints); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AT, F, + &Hints); // Check the function attributes to find out if this function should be // optimized for size. @@ -1471,6 +1485,7 @@ struct LoopVectorize : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addRequired(); @@ -3361,6 +3376,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); assert(ID && "Not an intrinsic call!"); switch (ID) { + case Intrinsic::assume: case Intrinsic::lifetime_end: case Intrinsic::lifetime_start: scalarizeInstruction(it); @@ -5457,6 +5473,10 @@ unsigned LoopVectorizationCostModel::getWidestType() { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { Type *T = it->getType(); + // Ignore ephemeral values. + if (EphValues.count(it)) + continue; + // Only examine Loads, Stores and PHINodes. if (!isa(it) && !isa(it) && !isa(it)) continue; @@ -5719,6 +5739,10 @@ LoopVectorizationCostModel::calculateRegisterUsage() { // Ignore instructions that are never used within the loop. if (!Ends.count(I)) continue; + // Ignore ephemeral values. + if (EphValues.count(I)) + continue; + // Remove all of the instructions that end at this location. InstrList &List = TransposeEnds[i]; for (unsigned int j=0, e = List.size(); j < e; ++j) @@ -5759,6 +5783,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { if (isa(it)) continue; + // Ignore ephemeral values. + if (EphValues.count(it)) + continue; + unsigned C = getInstructionCost(it, VF); // Check if we should override the cost. @@ -6059,6 +6087,7 @@ static const char lv_name[] = "Loop Vectorization"; INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) diff --git a/test/Transforms/LoopVectorize/X86/assume.ll b/test/Transforms/LoopVectorize/X86/assume.ll new file mode 100644 index 00000000000..a94e24dd7e9 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/assume.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 { +entry: + br label %for.body + +; CHECK-LABEL: @test1 +; CHECK: vector.body: +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: for.body: +; CHECK: ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp1 = fcmp ogt float %0, 1.000000e+02 + tail call void @llvm.assume(i1 %cmp1) + %add = fadd float %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv + store float %add, float* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 1599 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Function Attrs: nounwind +declare void @llvm.assume(i1) #1 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + +%struct.data = type { float*, float* } + +; Function Attrs: nounwind uwtable +define void @test2(%struct.data* nocapture readonly %d) #0 { +entry: + %b = getelementptr inbounds %struct.data* %d, i64 0, i32 1 + %0 = load float** %b, align 8 + %ptrint = ptrtoint float* %0 to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + %a = getelementptr inbounds %struct.data* %d, i64 0, i32 0 + %1 = load float** %a, align 8 + %ptrint2 = ptrtoint float* %1 to i64 + %maskedptr3 = and i64 %ptrint2, 31 + %maskcond4 = icmp eq i64 %maskedptr3, 0 + br label %for.body + +; CHECK-LABEL: @test2 +; CHECK: vector.body: +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: for.body: +; CHECK: ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + tail call void @llvm.assume(i1 %maskcond) + %arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv + %2 = load float* %arrayidx, align 4 + %add = fadd float %2, 1.000000e+00 + tail call void @llvm.assume(i1 %maskcond4) + %arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv + store float %add, float* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 1599 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} +