From b1831c66403315a1d84593b7c198ddbd43a574cf Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Thu, 11 Aug 2011 23:36:16 +0000 Subject: [PATCH] Allow loop unrolling to get known trip counts from ScalarEvolution. SCEV unrolling can unroll loops with arbitrary induction variables. It is a prerequisite for -disable-iv-rewrite performance. It is also easily handles loops of arbitrary structure including multiple exits and is generally more robust. This is under a temporary option to avoid affecting default behavior for the next couple of weeks. It is needed so that I can checkin unit tests for updateUnloop. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137384 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/ScalarEvolution.h | 15 +- lib/Analysis/ScalarEvolution.cpp | 57 ++++++++ lib/Transforms/Scalar/LoopUnrollPass.cpp | 30 +++- test/Transforms/LoopUnroll/scevunroll.ll | 172 +++++++++++++++++++++++ 4 files changed, 266 insertions(+), 8 deletions(-) create mode 100644 test/Transforms/LoopUnroll/scevunroll.ll diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index c621bec86a7..6e30b317714 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -507,7 +507,8 @@ namespace llvm { /// FoundLHS, and FoundRHS is true. bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const SCEV *FoundLHS, const SCEV *FoundRHS); + const SCEV *FoundLHS, + const SCEV *FoundRHS); /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a @@ -710,6 +711,18 @@ namespace llvm { bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// getSmallConstantTripCount - Returns the maximum trip count of this loop + /// as a normal unsigned value, if possible. Returns 0 if the trip count is + /// unknown or not constant. + unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitBlock); + + /// getSmallConstantTripMultiple - Returns the largest constant divisor of + /// the trip count of this loop as a normal unsigned value, if + /// possible. This means that the actual trip count is always a multiple of + /// the returned value (don't forget the trip count could very well be zero + /// as well!). + unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitBlock); + // getExitCount - Get the expression for the number of loop iterations for // which this loop is guaranteed not to exit via ExitingBlock. Otherwise // return SCEVCouldNotCompute. diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 487bec6a39a..202e715aba3 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -3830,6 +3830,63 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Iteration Count Computation Code // +/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a +/// normal unsigned value, if possible. Returns 0 if the trip count is unknown +/// or not constant. Will also return 0 if the maximum trip count is very large +/// (>= 2^32) +unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, + BasicBlock *ExitBlock) { + const SCEVConstant *ExitCount = + dyn_cast(getExitCount(L, ExitBlock)); + if (!ExitCount) + return 0; + + ConstantInt *ExitConst = ExitCount->getValue(); + + // Guard against huge trip counts. + if (ExitConst->getValue().getActiveBits() > 32) + return 0; + + // In case of integer overflow, this returns 0, which is correct. + return ((unsigned)ExitConst->getZExtValue()) + 1; +} + +/// getSmallConstantTripMultiple - Returns the largest constant divisor of the +/// trip count of this loop as a normal unsigned value, if possible. This +/// means that the actual trip count is always a multiple of the returned +/// value (don't forget the trip count could very well be zero as well!). +/// +/// Returns 1 if the trip count is unknown or not guaranteed to be the +/// multiple of a constant (which is also the case if the trip count is simply +/// constant, use getSmallConstantTripCount for that case), Will also return 1 +/// if the trip count is very large (>= 2^32). +unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, + BasicBlock *ExitBlock) { + const SCEV *ExitCount = getExitCount(L, ExitBlock); + if (ExitCount == getCouldNotCompute()) + return 1; + + // Get the trip count from the BE count by adding 1. + const SCEV *TCMul = getAddExpr(ExitCount, + getConstant(ExitCount->getType(), 1)); + // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt + // to factor simple cases. + if (const SCEVMulExpr *Mul = dyn_cast(TCMul)) + TCMul = Mul->getOperand(0); + + const SCEVConstant *MulC = dyn_cast(TCMul); + if (!MulC) + return 1; + + ConstantInt *Result = MulC->getValue(); + + // Guard against huge trip counts. + if (!Result || Result->getValue().getActiveBits() > 32) + return 1; + + return (unsigned)Result->getZExtValue(); +} + // getExitCount - Get the expression for the number of loop iterations for which // this loop is guaranteed not to exit via ExitintBlock. Otherwise return // SCEVCouldNotCompute. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 94afff6813d..dab3ac42eaf 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -39,6 +39,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); +// Temporary flag to be made default shortly. +static cl::opt +UnrollWithSCEV("unroll-scev", cl::init(false), cl::Hidden, + cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling")); + namespace { class LoopUnroll : public LoopPass { public: @@ -121,6 +126,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis(); + ScalarEvolution *SE = &getAnalysis(); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() @@ -136,14 +142,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) Threshold = OptSizeUnrollThreshold; - // Find trip count - unsigned TripCount = L->getSmallConstantTripCount(); - - // Find trip multiple if count is not available + // Find trip count and trip multiple if count is not available + unsigned TripCount = 0; unsigned TripMultiple = 1; - if (TripCount == 0) - TripMultiple = L->getSmallConstantTripMultiple(); - + if (UnrollWithSCEV) { + // Find "latch trip count". UnrollLoop assumes that control cannot exit + // via the loop latch on any iteration prior to TripCount. The loop may exit + // early via an earlier branch. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + TripCount = SE->getSmallConstantTripCount(L, LatchBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); + } + } + else { + TripCount = L->getSmallConstantTripCount(); + if (TripCount == 0) + TripMultiple = L->getSmallConstantTripMultiple(); + } // Automatically select an unroll count. unsigned Count = CurrentCount; if (Count == 0) { diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll new file mode 100644 index 00000000000..0f5fbe4e9c2 --- /dev/null +++ b/test/Transforms/LoopUnroll/scevunroll.ll @@ -0,0 +1,172 @@ +; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info -unroll-scev | FileCheck %s +; +; Unit tests for loop unrolling using ScalarEvolution to compute trip counts. +; +; Indvars is run first to generate an "old" SCEV result. Some unit +; tests may check that SCEV is properly invalidated between passes. + +; Completely unroll loops without a canonical IV. +; +; CHECK: @sansCanonical +; CHECK-NOT: phi +; CHECK-NOT: icmp +; CHECK: ret +define i32 @sansCanonical(i32* %base) nounwind { +entry: + br label %while.body + +while.body: + %iv = phi i64 [ 10, %entry ], [ %iv.next, %while.body ] + %sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ] + %iv.next = add i64 %iv, -1 + %adr = getelementptr inbounds i32* %base, i64 %iv.next + %tmp = load i32* %adr, align 8 + %sum.next = add i32 %sum, %tmp + %iv.narrow = trunc i64 %iv.next to i32 + %cmp.i65 = icmp sgt i32 %iv.narrow, 0 + br i1 %cmp.i65, label %while.body, label %exit + +exit: + ret i32 %sum +} + +; SCEV unrolling properly handles loops with multiple exits. In this +; case, the computed trip count based on a canonical IV is *not* for a +; latch block. Canonical unrolling incorrectly unrolls it, but SCEV +; unrolling does not. +; +; CHECK: @earlyLoopTest +; CHECK: tail: +; CHECK-NOT: br +; CHECK: br i1 %cmp2, label %loop, label %exit2 +define i64 @earlyLoopTest(i64* %base) nounwind { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %inc, %tail ] + %s = phi i64 [ 0, %entry ], [ %s.next, %tail ] + %adr = getelementptr i64* %base, i64 %iv + %val = load i64* %adr + %s.next = add i64 %s, %val + %inc = add i64 %iv, 1 + %cmp = icmp ne i64 %inc, 4 + br i1 %cmp, label %tail, label %exit1 + +tail: + %cmp2 = icmp ne i64 %val, 0 + br i1 %cmp2, label %loop, label %exit2 + +exit1: + ret i64 %s + +exit2: + ret i64 %s.next +} + +; SCEV properly unrolls multi-exit loops. +; +; CHECK: @multiExit +; CHECK: getelementptr i32* %base, i64 10 +; CHECK-NEXT: load i32* +; CHECK: br i1 false, label %l2.10, label %exit1 +; CHECK: l2.10: +; CHECK-NOT: br +; CHECK: ret i32 +define i32 @multiExit(i32* %base) nounwind { +entry: + br label %l1 +l1: + %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l2 ] + %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l2 ] + %inc1 = add i32 %iv1, 1 + %inc2 = add i32 %iv2, 1 + %adr = getelementptr i32* %base, i32 %iv1 + %val = load i32* %adr + %cmp1 = icmp slt i32 %iv1, 5 + br i1 %cmp1, label %l2, label %exit1 +l2: + %cmp2 = icmp slt i32 %iv2, 10 + br i1 %cmp2, label %l1, label %exit2 +exit1: + ret i32 1 +exit2: + ret i32 %val +} + + +; SCEV should not unroll a multi-exit loops unless the latch block has +; a known trip count, regardless of the early exit trip counts. The +; LoopUnroll utility uses this assumption to optimize the latch +; block's branch. +; +; CHECK: @multiExit +; CHECK: l3: +; CHECK-NOT: br +; CHECK: br i1 %cmp3, label %l1, label %exit3 +define i32 @multiExitIncomplete(i32* %base) nounwind { +entry: + br label %l1 +l1: + %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l3 ] + %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l3 ] + %inc1 = add i32 %iv1, 1 + %inc2 = add i32 %iv2, 1 + %adr = getelementptr i32* %base, i32 %iv1 + %val = load i32* %adr + %cmp1 = icmp slt i32 %iv1, 5 + br i1 %cmp1, label %l2, label %exit1 +l2: + %cmp2 = icmp slt i32 %iv2, 10 + br i1 %cmp2, label %l3, label %exit2 +l3: + %cmp3 = icmp ne i32 %val, 0 + br i1 %cmp3, label %l1, label %exit3 + +exit1: + ret i32 1 +exit2: + ret i32 2 +exit3: + ret i32 3 +} + +; When loop unroll merges a loop exit with one of its parent loop's +; exits, SCEV must forget its ExitNotTaken info. +; +; CHECK: @nestedUnroll +; CHECK-NOT: br i1 +; CHECK: for.body87: +define void @nestedUnroll() nounwind { +entry: + br label %for.inc + +for.inc: + br i1 false, label %for.inc, label %for.body38.preheader + +for.body38.preheader: + br label %for.body38 + +for.body38: + %i.113 = phi i32 [ %inc76, %for.inc74 ], [ 0, %for.body38.preheader ] + %mul48 = mul nsw i32 %i.113, 6 + br label %for.body43 + +for.body43: + %j.011 = phi i32 [ 0, %for.body38 ], [ %inc72, %for.body43 ] + %add49 = add nsw i32 %j.011, %mul48 + %sh_prom50 = zext i32 %add49 to i64 + %inc72 = add nsw i32 %j.011, 1 + br i1 false, label %for.body43, label %for.inc74 + +for.inc74: + %inc76 = add nsw i32 %i.113, 1 + br i1 false, label %for.body38, label %for.body87.preheader + +for.body87.preheader: + br label %for.body87 + +for.body87: + br label %for.body87 +} +