diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0b29445dd01..a8e1f4579c7 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -318,6 +318,93 @@ private: ValueMap WidenMap; }; +/// \brief Check if conditionally executed loads are hoistable. +/// +/// This class has two functions. isHoistableLoad and canHoistAllLoads. +/// isHoistableLoad should be called on all load instructions that are executed +/// conditionally. After all conditional loads are processed, the client should +/// call canHoistAllLoads to determine if all of the conditional execute loads +/// have an unconditional memory access in the loop. +class LoadHoisting { + typedef SmallPtrSet MemorySet; + + Loop *TheLoop; + DominatorTree *DT; + MemorySet CondLoadAddrSet; + +public: + LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {} + + /// \brief Check if the instruction is a load with a identifiable address. + bool isHoistableLoad(Instruction *L); + + /// \brief Check if all of the conditional loads are hoistable because there + /// exists an unconditional memory access to the same address in the loop. + bool canHoistAllLoads(); +}; + +bool LoadHoisting::isHoistableLoad(Instruction *L) { + LoadInst *LI = dyn_cast(L); + if (!LI) + return false; + + CondLoadAddrSet.insert(LI->getPointerOperand()); + return true; +} + +static void addMemAccesses(BasicBlock *BB, SmallPtrSet &Set) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + Value *Addr = 0; + + // Try a load. + LoadInst *LI = dyn_cast(I); + if (LI) { + Addr = LI->getPointerOperand(); + Set.insert(Addr); + continue; + } + + // Try a store. + StoreInst *SI = dyn_cast(I); + if (!SI) + continue; + + Addr = SI->getPointerOperand(); + Set.insert(Addr); + } +} + +bool LoadHoisting::canHoistAllLoads() { + // No conditional loads. + if (CondLoadAddrSet.empty()) + return true; + + MemorySet UncondMemAccesses; + std::vector &LoopBlocks = TheLoop->getBlocksVector(); + BasicBlock *LoopLatch = TheLoop->getLoopLatch(); + + // Iterate over the unconditional blocks and collect memory access addresses. + for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) { + BasicBlock *BB = LoopBlocks[i]; + + // Ignore conditional blocks. + if (BB != LoopLatch && !DT->dominates(BB, LoopLatch)) + continue; + + addMemAccesses(BB, UncondMemAccesses); + } + + // And make sure there is a matching unconditional access for every + // conditional load. + for (MemorySet::iterator MI = CondLoadAddrSet.begin(), + ME = CondLoadAddrSet.end(); MI != ME; ++MI) + if (!UncondMemAccesses.count(*MI)) + return false; + + return true; +} + /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the @@ -337,7 +424,8 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {} + Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false), + LoadSpeculation(L, DT) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -598,6 +686,9 @@ private: RuntimePointerCheck PtrRtCheck; /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr; + + /// Utility to determine whether loads can be speculated. + LoadHoisting LoadSpeculation; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -3259,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - // We don't predicate loads/stores at the moment. - if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow()) + // We might be able to hoist the load. + if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it)) + return false; + + // We predicate stores at the moment. + if (it->mayWriteToMemory() || it->mayThrow()) return false; // The instructions below can trap. @@ -3274,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { } } + // Check that we can actually speculate the hoistable loads. + if (!LoadSpeculation.canHoistAllLoads()) + return false; + return true; } diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll new file mode 100644 index 00000000000..fad17350cd0 --- /dev/null +++ b/test/Transforms/LoopVectorize/hoist-loads.ll @@ -0,0 +1,69 @@ +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 + +; Make sure we can vectorize in the presence of hoistable conditional loads. +; CHECK: hoist_cond_load +; CHECK: load <2 x float> + +define void @hoist_cond_load() { +entry: + br label %for.body +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ] + %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx2, align 4 + %cmp3 = fcmp oeq float %0, 0.000000e+00 + br i1 %cmp3, label %if.end9, label %if.else + +if.else: + %1 = load float* %arrayidx, align 4 + br label %if.end9 + +if.end9: + %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ] + store float %tmp.0, float* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +; However, we can't hoist loads whose address we have not seen unconditionally +; accessed. +; CHECK: dont_hoist_cond_load +; CHECK-NOT: load <2 x float> + +define void @dont_hoist_cond_load() { +entry: + br label %for.body +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ] + %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx2, align 4 + %cmp3 = fcmp oeq float %0, 0.000000e+00 + br i1 %cmp3, label %if.end9, label %if.else + +if.else: + %1 = load float* %arrayidx, align 4 + br label %if.end9 + +if.end9: + %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ] + store float %tmp.0, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +}