LoopVectorize: Hoist conditional loads if possible

InstCombine can be uncooperative to vectorization and sink loads into
conditional blocks. This prevents vectorization.

Undo this optimization if there are unconditional memory accesses to the same
addresses in the loop.

radar://13815763

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181860 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Arnold Schwaighofer 2013-05-15 01:44:30 +00:00
parent a88d974ce2
commit 1386692ef6
2 changed files with 171 additions and 3 deletions

View File

@ -318,6 +318,93 @@ private:
ValueMap WidenMap;
};
/// \brief Check if conditionally executed loads are hoistable.
///
/// This class has two functions. isHoistableLoad and canHoistAllLoads.
/// isHoistableLoad should be called on all load instructions that are executed
/// conditionally. After all conditional loads are processed, the client should
/// call canHoistAllLoads to determine if all of the conditional execute loads
/// have an unconditional memory access in the loop.
class LoadHoisting {
typedef SmallPtrSet<Value *, 8> MemorySet;
Loop *TheLoop;
DominatorTree *DT;
MemorySet CondLoadAddrSet;
public:
LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {}
/// \brief Check if the instruction is a load with a identifiable address.
bool isHoistableLoad(Instruction *L);
/// \brief Check if all of the conditional loads are hoistable because there
/// exists an unconditional memory access to the same address in the loop.
bool canHoistAllLoads();
};
bool LoadHoisting::isHoistableLoad(Instruction *L) {
LoadInst *LI = dyn_cast<LoadInst>(L);
if (!LI)
return false;
CondLoadAddrSet.insert(LI->getPointerOperand());
return true;
}
static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
Instruction *I = &*BI;
Value *Addr = 0;
// Try a load.
LoadInst *LI = dyn_cast<LoadInst>(I);
if (LI) {
Addr = LI->getPointerOperand();
Set.insert(Addr);
continue;
}
// Try a store.
StoreInst *SI = dyn_cast<StoreInst>(I);
if (!SI)
continue;
Addr = SI->getPointerOperand();
Set.insert(Addr);
}
}
bool LoadHoisting::canHoistAllLoads() {
// No conditional loads.
if (CondLoadAddrSet.empty())
return true;
MemorySet UncondMemAccesses;
std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
BasicBlock *LoopLatch = TheLoop->getLoopLatch();
// Iterate over the unconditional blocks and collect memory access addresses.
for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
BasicBlock *BB = LoopBlocks[i];
// Ignore conditional blocks.
if (BB != LoopLatch && !DT->dominates(BB, LoopLatch))
continue;
addMemAccesses(BB, UncondMemAccesses);
}
// And make sure there is a matching unconditional access for every
// conditional load.
for (MemorySet::iterator MI = CondLoadAddrSet.begin(),
ME = CondLoadAddrSet.end(); MI != ME; ++MI)
if (!UncondMemAccesses.count(*MI))
return false;
return true;
}
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@ -337,7 +424,8 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {}
Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
LoadSpeculation(L, DT) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@ -598,6 +686,9 @@ private:
RuntimePointerCheck PtrRtCheck;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
/// Utility to determine whether loads can be speculated.
LoadHoisting LoadSpeculation;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@ -3259,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
// We don't predicate loads/stores at the moment.
if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
// We might be able to hoist the load.
if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it))
return false;
// We predicate stores at the moment.
if (it->mayWriteToMemory() || it->mayThrow())
return false;
// The instructions below can trap.
@ -3274,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
}
}
// Check that we can actually speculate the hoistable loads.
if (!LoadSpeculation.canHoistAllLoads())
return false;
return true;
}

View File

@ -0,0 +1,69 @@
; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
; Make sure we can vectorize in the presence of hoistable conditional loads.
; CHECK: hoist_cond_load
; CHECK: load <2 x float>
define void @hoist_cond_load() {
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
%arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
%arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
%0 = load float* %arrayidx2, align 4
%cmp3 = fcmp oeq float %0, 0.000000e+00
br i1 %cmp3, label %if.end9, label %if.else
if.else:
%1 = load float* %arrayidx, align 4
br label %if.end9
if.end9:
%tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
store float %tmp.0, float* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, 1024
br i1 %exitcond, label %for.body, label %for.end
for.end:
ret void
}
; However, we can't hoist loads whose address we have not seen unconditionally
; accessed.
; CHECK: dont_hoist_cond_load
; CHECK-NOT: load <2 x float>
define void @dont_hoist_cond_load() {
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
%arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
%arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
%0 = load float* %arrayidx2, align 4
%cmp3 = fcmp oeq float %0, 0.000000e+00
br i1 %cmp3, label %if.end9, label %if.else
if.else:
%1 = load float* %arrayidx, align 4
br label %if.end9
if.end9:
%tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
store float %tmp.0, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, 1024
br i1 %exitcond, label %for.body, label %for.end
for.end:
ret void
}