mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-15 07:34:33 +00:00
Allow loop-idiom to run on multiple BB loops, but still only scan the loop
header for now for memset/memcpy opportunities. It turns out that loop-rotate is successfully rotating loops, but *DOESN'T MERGE THE BLOCKS*, turning "for loops" into 2 basic block loops that loop-idiom was ignoring. With this fix, we form many *many* more memcpy and memsets than before, including on the "history" loops in the viterbi benchmark, which look like this: for (j=0; j<MAX_history; ++j) { history_new[i][j+1] = history[2*i][j]; } Transforming these loops into memcpy's speeds up the viterbi benchmark from 11.98s to 3.55s on my machine. Woo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122685 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c9e152b778
commit
cf078f2b20
@ -414,14 +414,6 @@ this construct.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
[LOOP RECOGNITION]
|
||||
|
||||
viterbi speeds up *significantly* if the various "history" related copy loops
|
||||
are turned into memcpy calls at the source level. We need a "loops to memcpy"
|
||||
pass.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
[LOOP OPTIMIZATION]
|
||||
|
||||
SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization
|
||||
|
@ -126,11 +126,6 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
|
||||
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
CurLoop = L;
|
||||
|
||||
// We only look at trivial single basic block loops.
|
||||
// TODO: eventually support more complex loops, scanning the header.
|
||||
if (L->getBlocks().size() != 1)
|
||||
return false;
|
||||
|
||||
// The trip count of the loop must be analyzable.
|
||||
SE = &getAnalysis<ScalarEvolution>();
|
||||
if (!SE->hasLoopInvariantBackedgeTakenCount(L))
|
||||
@ -142,6 +137,11 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
TD = getAnalysisIfAvailable<TargetData>();
|
||||
if (TD == 0) return false;
|
||||
|
||||
// TODO: We currently only scan the header of the loop, because it is the only
|
||||
// part that is known to execute and we don't want to make a conditional store
|
||||
// into an unconditional one in the preheader. However, there can be diamonds
|
||||
// and other things in the loop that would make other blocks "always executed"
|
||||
// we should get the full set and scan each block.
|
||||
BasicBlock *BB = L->getHeader();
|
||||
DEBUG(dbgs() << "loop-idiom Scanning: F[" << BB->getParent()->getName()
|
||||
<< "] Loop %" << BB->getName() << "\n");
|
||||
|
@ -21,6 +21,30 @@ for.end: ; preds = %for.body, %entry
|
||||
; CHECK-NOT: store
|
||||
}
|
||||
|
||||
; This is a loop that was rotated but where the blocks weren't merged. This
|
||||
; shouldn't perturb us.
|
||||
define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
|
||||
bb.nph: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %bb.nph, %for.body
|
||||
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
|
||||
%I.0.014 = getelementptr i8* %Base, i64 %indvar
|
||||
store i8 0, i8* %I.0.014, align 1
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.body.cont
|
||||
for.body.cont:
|
||||
%exitcond = icmp eq i64 %indvar.next, %Size
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
; CHECK: @test1a
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
|
||||
; CHECK-NOT: store
|
||||
}
|
||||
|
||||
|
||||
define void @test2(i32* %Base, i64 %Size) nounwind ssp {
|
||||
entry:
|
||||
%cmp10 = icmp eq i64 %Size, 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user