diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 6d7901f88c6..94afff6813d 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -79,6 +79,7 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); + AU.addRequired(); AU.addPreserved(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next @@ -187,12 +188,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Unroll the loop. - Function *F = L->getHeader()->getParent(); if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM)) return false; - // FIXME: Reconstruct dom info, because it is not preserved properly. - if (DominatorTree *DT = getAnalysisIfAvailable()) - DT->runOnFunction(*F); return true; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6b2f9efe150..ad398537797 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -29,6 +29,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; // TODO: Should these be here or in LoopUnroll? @@ -130,6 +131,9 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. +/// +/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are +/// available it must also preseve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); @@ -163,7 +167,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable()) + ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); + if (SE) SE->forgetLoop(L); if (TripCount != 0) @@ -374,6 +379,24 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, } } + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling woud be easy. + if (DominatorTree *DT = LPM->getAnalysisIfAvailable()) + DT->runOnFunction(*L->getHeader()->getParent()); + + // Simplify any new induction variables in the partially unrolled loop. + if (SE && !CompletelyUnroll) { + SmallVector DeadInsts; + simplifyLoopIVs(L, SE, LPM, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. diff --git a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll new file mode 100644 index 00000000000..5790603cfc2 --- /dev/null +++ b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll @@ -0,0 +1,39 @@ +; RUN: opt -S < %s -loop-unroll -unroll-count=4 -disable-iv-rewrite | FileCheck %s +; +; Test induction variable simplify after loop unrolling. It should +; expose nice opportunities for GVN. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" + +; PR10534: LoopUnroll not keeping canonical induction variable... +; CHECK: while.body.1: +; CHECK: %shr.1 = lshr i32 %bit_addr.addr.01, 5 +; CHECK: %arrayidx.1 = getelementptr inbounds i32* %bitmap, i32 %shr.1 +; CHECK: while.body.2: +; CHECK: %shr.2 = lshr i32 %bit_addr.addr.01, 5 +; CHECK: %arrayidx.2 = getelementptr inbounds i32* %bitmap, i32 %shr.2 +; CHECK: while.body.3: +; CHECK: %shr.3 = lshr i32 %bit_addr.addr.01, 5 +; CHECK: %arrayidx.3 = getelementptr inbounds i32* %bitmap, i32 %shr.3 +define void @FlipBit(i32* nocapture %bitmap, i32 %bit_addr, i32 %nbits) nounwind { +entry: + br label %while.body + +while.body: + %nbits.addr.02 = phi i32 [ 128, %entry ], [ %dec, %while.body ] + %bit_addr.addr.01 = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %dec = add i32 %nbits.addr.02, -1 + %shr = lshr i32 %bit_addr.addr.01, 5 + %rem = and i32 %bit_addr.addr.01, 31 + %shl = shl i32 1, %rem + %arrayidx = getelementptr inbounds i32* %bitmap, i32 %shr + %tmp6 = load i32* %arrayidx, align 4 + %xor = xor i32 %tmp6, %shl + store i32 %xor, i32* %arrayidx, align 4 + %inc = add i32 %bit_addr.addr.01, 1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: + ret void +}