From cf078f2b20899a3a19fb2044cc08dff409f13276 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 2 Jan 2011 07:58:36 +0000 Subject: [PATCH] Allow loop-idiom to run on multiple BB loops, but still only scan the loop header for now for memset/memcpy opportunities. It turns out that loop-rotate is successfully rotating loops, but *DOESN'T MERGE THE BLOCKS*, turning "for loops" into 2 basic block loops that loop-idiom was ignoring. With this fix, we form many *many* more memcpy and memsets than before, including on the "history" loops in the viterbi benchmark, which look like this: for (j=0; jgetBlocks().size() != 1) - return false; - // The trip count of the loop must be analyzable. SE = &getAnalysis(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) @@ -142,6 +137,11 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { TD = getAnalysisIfAvailable(); if (TD == 0) return false; + // TODO: We currently only scan the header of the loop, because it is the only + // part that is known to execute and we don't want to make a conditional store + // into an unconditional one in the preheader. However, there can be diamonds + // and other things in the loop that would make other blocks "always executed" + // we should get the full set and scan each block. BasicBlock *BB = L->getHeader(); DEBUG(dbgs() << "loop-idiom Scanning: F[" << BB->getParent()->getName() << "] Loop %" << BB->getName() << "\n"); diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 8929fe48015..589fea4f3a3 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -21,6 +21,30 @@ for.end: ; preds = %for.body, %entry ; CHECK-NOT: store } +; This is a loop that was rotated but where the blocks weren't merged. This +; shouldn't perturb us. +define void @test1a(i8* %Base, i64 %Size) nounwind ssp { +bb.nph: ; preds = %entry + br label %for.body + +for.body: ; preds = %bb.nph, %for.body + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] + %I.0.014 = getelementptr i8* %Base, i64 %indvar + store i8 0, i8* %I.0.014, align 1 + %indvar.next = add i64 %indvar, 1 + br label %for.body.cont +for.body.cont: + %exitcond = icmp eq i64 %indvar.next, %Size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +; CHECK: @test1a +; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) +; CHECK-NOT: store +} + + define void @test2(i32* %Base, i64 %Size) nounwind ssp { entry: %cmp10 = icmp eq i64 %Size, 0