diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 4e374e59d6d..f047d087cbf 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -414,14 +414,6 @@ this construct. //===---------------------------------------------------------------------===// -[LOOP RECOGNITION] - -viterbi speeds up *significantly* if the various "history" related copy loops -are turned into memcpy calls at the source level. We need a "loops to memcpy" -pass. - -//===---------------------------------------------------------------------===// - [LOOP OPTIMIZATION] SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index e9394cd5c02..84e33f062b0 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -126,11 +126,6 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; - // We only look at trivial single basic block loops. - // TODO: eventually support more complex loops, scanning the header. - if (L->getBlocks().size() != 1) - return false; - // The trip count of the loop must be analyzable. SE = &getAnalysis(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) @@ -142,6 +137,11 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { TD = getAnalysisIfAvailable(); if (TD == 0) return false; + // TODO: We currently only scan the header of the loop, because it is the only + // part that is known to execute and we don't want to make a conditional store + // into an unconditional one in the preheader. However, there can be diamonds + // and other things in the loop that would make other blocks "always executed" + // we should get the full set and scan each block. BasicBlock *BB = L->getHeader(); DEBUG(dbgs() << "loop-idiom Scanning: F[" << BB->getParent()->getName() << "] Loop %" << BB->getName() << "\n"); diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 8929fe48015..589fea4f3a3 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -21,6 +21,30 @@ for.end: ; preds = %for.body, %entry ; CHECK-NOT: store } +; This is a loop that was rotated but where the blocks weren't merged. This +; shouldn't perturb us. +define void @test1a(i8* %Base, i64 %Size) nounwind ssp { +bb.nph: ; preds = %entry + br label %for.body + +for.body: ; preds = %bb.nph, %for.body + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] + %I.0.014 = getelementptr i8* %Base, i64 %indvar + store i8 0, i8* %I.0.014, align 1 + %indvar.next = add i64 %indvar, 1 + br label %for.body.cont +for.body.cont: + %exitcond = icmp eq i64 %indvar.next, %Size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +; CHECK: @test1a +; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) +; CHECK-NOT: store +} + + define void @test2(i32* %Base, i64 %Size) nounwind ssp { entry: %cmp10 = icmp eq i64 %Size, 0