mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-02 07:32:52 +00:00
LoopVectorize: Hoist conditional loads if possible
InstCombine can be uncooperative to vectorization and sink loads into conditional blocks. This prevents vectorization. Undo this optimization if there are unconditional memory accesses to the same addresses in the loop. radar://13815763 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181860 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a88d974ce2
commit
1386692ef6
@ -318,6 +318,93 @@ private:
|
||||
ValueMap WidenMap;
|
||||
};
|
||||
|
||||
/// \brief Check if conditionally executed loads are hoistable.
|
||||
///
|
||||
/// This class has two functions. isHoistableLoad and canHoistAllLoads.
|
||||
/// isHoistableLoad should be called on all load instructions that are executed
|
||||
/// conditionally. After all conditional loads are processed, the client should
|
||||
/// call canHoistAllLoads to determine if all of the conditional execute loads
|
||||
/// have an unconditional memory access in the loop.
|
||||
class LoadHoisting {
|
||||
typedef SmallPtrSet<Value *, 8> MemorySet;
|
||||
|
||||
Loop *TheLoop;
|
||||
DominatorTree *DT;
|
||||
MemorySet CondLoadAddrSet;
|
||||
|
||||
public:
|
||||
LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {}
|
||||
|
||||
/// \brief Check if the instruction is a load with a identifiable address.
|
||||
bool isHoistableLoad(Instruction *L);
|
||||
|
||||
/// \brief Check if all of the conditional loads are hoistable because there
|
||||
/// exists an unconditional memory access to the same address in the loop.
|
||||
bool canHoistAllLoads();
|
||||
};
|
||||
|
||||
bool LoadHoisting::isHoistableLoad(Instruction *L) {
|
||||
LoadInst *LI = dyn_cast<LoadInst>(L);
|
||||
if (!LI)
|
||||
return false;
|
||||
|
||||
CondLoadAddrSet.insert(LI->getPointerOperand());
|
||||
return true;
|
||||
}
|
||||
|
||||
static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) {
|
||||
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
|
||||
Instruction *I = &*BI;
|
||||
Value *Addr = 0;
|
||||
|
||||
// Try a load.
|
||||
LoadInst *LI = dyn_cast<LoadInst>(I);
|
||||
if (LI) {
|
||||
Addr = LI->getPointerOperand();
|
||||
Set.insert(Addr);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try a store.
|
||||
StoreInst *SI = dyn_cast<StoreInst>(I);
|
||||
if (!SI)
|
||||
continue;
|
||||
|
||||
Addr = SI->getPointerOperand();
|
||||
Set.insert(Addr);
|
||||
}
|
||||
}
|
||||
|
||||
bool LoadHoisting::canHoistAllLoads() {
|
||||
// No conditional loads.
|
||||
if (CondLoadAddrSet.empty())
|
||||
return true;
|
||||
|
||||
MemorySet UncondMemAccesses;
|
||||
std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
|
||||
BasicBlock *LoopLatch = TheLoop->getLoopLatch();
|
||||
|
||||
// Iterate over the unconditional blocks and collect memory access addresses.
|
||||
for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
|
||||
BasicBlock *BB = LoopBlocks[i];
|
||||
|
||||
// Ignore conditional blocks.
|
||||
if (BB != LoopLatch && !DT->dominates(BB, LoopLatch))
|
||||
continue;
|
||||
|
||||
addMemAccesses(BB, UncondMemAccesses);
|
||||
}
|
||||
|
||||
// And make sure there is a matching unconditional access for every
|
||||
// conditional load.
|
||||
for (MemorySet::iterator MI = CondLoadAddrSet.begin(),
|
||||
ME = CondLoadAddrSet.end(); MI != ME; ++MI)
|
||||
if (!UncondMemAccesses.count(*MI))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
|
||||
/// to what vectorization factor.
|
||||
/// This class does not look at the profitability of vectorization, only the
|
||||
@ -337,7 +424,8 @@ public:
|
||||
DominatorTree *DT, TargetTransformInfo* TTI,
|
||||
AliasAnalysis *AA, TargetLibraryInfo *TLI)
|
||||
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
|
||||
Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {}
|
||||
Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
|
||||
LoadSpeculation(L, DT) {}
|
||||
|
||||
/// This enum represents the kinds of reductions that we support.
|
||||
enum ReductionKind {
|
||||
@ -598,6 +686,9 @@ private:
|
||||
RuntimePointerCheck PtrRtCheck;
|
||||
/// Can we assume the absence of NaNs.
|
||||
bool HasFunNoNaNAttr;
|
||||
|
||||
/// Utility to determine whether loads can be speculated.
|
||||
LoadHoisting LoadSpeculation;
|
||||
};
|
||||
|
||||
/// LoopVectorizationCostModel - estimates the expected speedups due to
|
||||
@ -3259,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
|
||||
|
||||
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
|
||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
||||
// We don't predicate loads/stores at the moment.
|
||||
if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
|
||||
// We might be able to hoist the load.
|
||||
if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it))
|
||||
return false;
|
||||
|
||||
// We predicate stores at the moment.
|
||||
if (it->mayWriteToMemory() || it->mayThrow())
|
||||
return false;
|
||||
|
||||
// The instructions below can trap.
|
||||
@ -3274,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
|
||||
}
|
||||
}
|
||||
|
||||
// Check that we can actually speculate the hoistable loads.
|
||||
if (!LoadSpeculation.canHoistAllLoads())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
69
test/Transforms/LoopVectorize/hoist-loads.ll
Normal file
69
test/Transforms/LoopVectorize/hoist-loads.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
@A = common global [1024 x float] zeroinitializer, align 16
|
||||
@B = common global [1024 x float] zeroinitializer, align 16
|
||||
|
||||
; Make sure we can vectorize in the presence of hoistable conditional loads.
|
||||
; CHECK: hoist_cond_load
|
||||
; CHECK: load <2 x float>
|
||||
|
||||
define void @hoist_cond_load() {
|
||||
entry:
|
||||
br label %for.body
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
|
||||
%arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
|
||||
%arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
|
||||
%0 = load float* %arrayidx2, align 4
|
||||
%cmp3 = fcmp oeq float %0, 0.000000e+00
|
||||
br i1 %cmp3, label %if.end9, label %if.else
|
||||
|
||||
if.else:
|
||||
%1 = load float* %arrayidx, align 4
|
||||
br label %if.end9
|
||||
|
||||
if.end9:
|
||||
%tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
|
||||
store float %tmp.0, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp ne i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; However, we can't hoist loads whose address we have not seen unconditionally
|
||||
; accessed.
|
||||
; CHECK: dont_hoist_cond_load
|
||||
; CHECK-NOT: load <2 x float>
|
||||
|
||||
define void @dont_hoist_cond_load() {
|
||||
entry:
|
||||
br label %for.body
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
|
||||
%arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
|
||||
%arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
|
||||
%0 = load float* %arrayidx2, align 4
|
||||
%cmp3 = fcmp oeq float %0, 0.000000e+00
|
||||
br i1 %cmp3, label %if.end9, label %if.else
|
||||
|
||||
if.else:
|
||||
%1 = load float* %arrayidx, align 4
|
||||
br label %if.end9
|
||||
|
||||
if.end9:
|
||||
%tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
|
||||
store float %tmp.0, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp ne i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user