From b1831c66403315a1d84593b7c198ddbd43a574cf Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Thu, 11 Aug 2011 23:36:16 +0000
Subject: [PATCH] Allow loop unrolling to get known trip counts from
 ScalarEvolution.

SCEV unrolling can unroll loops with arbitrary induction variables. It
is a prerequisite for -disable-iv-rewrite performance. It is also
easily handles loops of arbitrary structure including multiple exits
and is generally more robust.

This is under a temporary option to avoid affecting default
behavior for the next couple of weeks. It is needed so that I can
checkin unit tests for updateUnloop.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137384 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Analysis/ScalarEvolution.h  |  15 +-
 lib/Analysis/ScalarEvolution.cpp         |  57 ++++++++
 lib/Transforms/Scalar/LoopUnrollPass.cpp |  30 +++-
 test/Transforms/LoopUnroll/scevunroll.ll | 172 +++++++++++++++++++++++
 4 files changed, 266 insertions(+), 8 deletions(-)
 create mode 100644 test/Transforms/LoopUnroll/scevunroll.ll
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index c621bec86a7..6e30b317714 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -507,7 +507,8 @@ namespace llvm {
     /// FoundLHS, and FoundRHS is true.
     bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
                                      const SCEV *LHS, const SCEV *RHS,
-                                     const SCEV *FoundLHS, const SCEV *FoundRHS);
+                                     const SCEV *FoundLHS,
+                                     const SCEV *FoundRHS);
 
     /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
     /// in the header of its containing loop, we know the loop executes a
@@ -710,6 +711,18 @@ namespace llvm {
     bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
                                      const SCEV *LHS, const SCEV *RHS);
 
+    /// getSmallConstantTripCount - Returns the maximum trip count of this loop
+    /// as a normal unsigned value, if possible. Returns 0 if the trip count is
+    /// unknown or not constant.
+    unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitBlock);
+
+    /// getSmallConstantTripMultiple - Returns the largest constant divisor of
+    /// the trip count of this loop as a normal unsigned value, if
+    /// possible. This means that the actual trip count is always a multiple of
+    /// the returned value (don't forget the trip count could very well be zero
+    /// as well!).
+    unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitBlock);
+
     // getExitCount - Get the expression for the number of loop iterations for
     // which this loop is guaranteed not to exit via ExitingBlock. Otherwise
     // return SCEVCouldNotCompute.
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 487bec6a39a..202e715aba3 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3830,6 +3830,63 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 //                   Iteration Count Computation Code
 //
 
+/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// or not constant. Will also return 0 if the maximum trip count is very large
+/// (>= 2^32)
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+                                                    BasicBlock *ExitBlock) {
+  const SCEVConstant *ExitCount =
+    dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
+  if (!ExitCount)
+    return 0;
+
+  ConstantInt *ExitConst = ExitCount->getValue();
+
+  // Guard against huge trip counts.
+  if (ExitConst->getValue().getActiveBits() > 32)
+    return 0;
+
+  // In case of integer overflow, this returns 0, which is correct.
+  return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+                                                       BasicBlock *ExitBlock) {
+  const SCEV *ExitCount = getExitCount(L, ExitBlock);
+  if (ExitCount == getCouldNotCompute())
+    return 1;
+
+  // Get the trip count from the BE count by adding 1.
+  const SCEV *TCMul = getAddExpr(ExitCount,
+                                 getConstant(ExitCount->getType(), 1));
+  // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
+  // to factor simple cases.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
+    TCMul = Mul->getOperand(0);
+
+  const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
+  if (!MulC)
+    return 1;
+
+  ConstantInt *Result = MulC->getValue();
+
+  // Guard against huge trip counts.
+  if (!Result || Result->getValue().getActiveBits() > 32)
+    return 1;
+
+  return (unsigned)Result->getZExtValue();
+}
+
 // getExitCount - Get the expression for the number of loop iterations for which
 // this loop is guaranteed not to exit via ExitintBlock. Otherwise return
 // SCEVCouldNotCompute.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 94afff6813d..dab3ac42eaf 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -39,6 +39,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
   cl::desc("Allows loops to be partially unrolled until "
            "-unroll-threshold loop size is reached."));
 
+// Temporary flag to be made default shortly.
+static cl::opt<bool>
+UnrollWithSCEV("unroll-scev", cl::init(false), cl::Hidden,
+  cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+
 namespace {
   class LoopUnroll : public LoopPass {
   public:
@@ -121,6 +126,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
 
 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   LoopInfo *LI = &getAnalysis<LoopInfo>();
+  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
 
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -136,14 +142,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
       Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
     Threshold = OptSizeUnrollThreshold;
 
-  // Find trip count
-  unsigned TripCount = L->getSmallConstantTripCount();
-
-  // Find trip multiple if count is not available
+  // Find trip count and trip multiple if count is not available
+  unsigned TripCount = 0;
   unsigned TripMultiple = 1;
-  if (TripCount == 0)
-    TripMultiple = L->getSmallConstantTripMultiple();
-
+  if (UnrollWithSCEV) {
+    // Find "latch trip count". UnrollLoop assumes that control cannot exit
+    // via the loop latch on any iteration prior to TripCount. The loop may exit
+    // early via an earlier branch.
+    BasicBlock *LatchBlock = L->getLoopLatch();
+    if (LatchBlock) {
+      TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+      TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
+    }
+  }
+  else {
+    TripCount = L->getSmallConstantTripCount();
+    if (TripCount == 0)
+      TripMultiple = L->getSmallConstantTripMultiple();
+  }
   // Automatically select an unroll count.
   unsigned Count = CurrentCount;
   if (Count == 0) {
diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll
new file mode 100644
index 00000000000..0f5fbe4e9c2
--- /dev/null
+++ b/test/Transforms/LoopUnroll/scevunroll.ll
@@ -0,0 +1,172 @@
+; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info -unroll-scev | FileCheck %s
+;
+; Unit tests for loop unrolling using ScalarEvolution to compute trip counts.
+;
+; Indvars is run first to generate an "old" SCEV result. Some unit
+; tests may check that SCEV is properly invalidated between passes.
+
+; Completely unroll loops without a canonical IV.
+;
+; CHECK: @sansCanonical
+; CHECK-NOT: phi
+; CHECK-NOT: icmp
+; CHECK: ret
+define i32 @sansCanonical(i32* %base) nounwind {
+entry:
+  br label %while.body
+
+while.body:
+  %iv = phi i64 [ 10, %entry ], [ %iv.next, %while.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ]
+  %iv.next = add i64 %iv, -1
+  %adr = getelementptr inbounds i32* %base, i64 %iv.next
+  %tmp = load i32* %adr, align 8
+  %sum.next = add i32 %sum, %tmp
+  %iv.narrow = trunc i64 %iv.next to i32
+  %cmp.i65 = icmp sgt i32 %iv.narrow, 0
+  br i1 %cmp.i65, label %while.body, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+; SCEV unrolling properly handles loops with multiple exits. In this
+; case, the computed trip count based on a canonical IV is *not* for a
+; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
+; unrolling does not.
+;
+; CHECK: @earlyLoopTest
+; CHECK: tail:
+; CHECK-NOT: br
+; CHECK: br i1 %cmp2, label %loop, label %exit2
+define i64 @earlyLoopTest(i64* %base) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %tail ]
+  %s = phi i64 [ 0, %entry ], [ %s.next, %tail ]
+  %adr = getelementptr i64* %base, i64 %iv
+  %val = load i64* %adr
+  %s.next = add i64 %s, %val
+  %inc = add i64 %iv, 1
+  %cmp = icmp ne i64 %inc, 4
+  br i1 %cmp, label %tail, label %exit1
+
+tail:
+  %cmp2 = icmp ne i64 %val, 0
+  br i1 %cmp2, label %loop, label %exit2
+
+exit1:
+  ret i64 %s
+
+exit2:
+  ret i64 %s.next
+}
+
+; SCEV properly unrolls multi-exit loops.
+;
+; CHECK: @multiExit
+; CHECK: getelementptr i32* %base, i64 10
+; CHECK-NEXT: load i32*
+; CHECK: br i1 false, label %l2.10, label %exit1
+; CHECK: l2.10:
+; CHECK-NOT: br
+; CHECK: ret i32
+define i32 @multiExit(i32* %base) nounwind {
+entry:
+  br label %l1
+l1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l2 ]
+  %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l2 ]
+  %inc1 = add i32 %iv1, 1
+  %inc2 = add i32 %iv2, 1
+  %adr = getelementptr i32* %base, i32 %iv1
+  %val = load i32* %adr
+  %cmp1 = icmp slt i32 %iv1, 5
+  br i1 %cmp1, label %l2, label %exit1
+l2:
+  %cmp2 = icmp slt i32 %iv2, 10
+  br i1 %cmp2, label %l1, label %exit2
+exit1:
+  ret i32 1
+exit2:
+  ret i32 %val
+}
+
+
+; SCEV should not unroll a multi-exit loops unless the latch block has
+; a known trip count, regardless of the early exit trip counts. The
+; LoopUnroll utility uses this assumption to optimize the latch
+; block's branch.
+;
+; CHECK: @multiExit
+; CHECK: l3:
+; CHECK-NOT: br
+; CHECK:   br i1 %cmp3, label %l1, label %exit3
+define i32 @multiExitIncomplete(i32* %base) nounwind {
+entry:
+  br label %l1
+l1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l3 ]
+  %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l3 ]
+  %inc1 = add i32 %iv1, 1
+  %inc2 = add i32 %iv2, 1
+  %adr = getelementptr i32* %base, i32 %iv1
+  %val = load i32* %adr
+  %cmp1 = icmp slt i32 %iv1, 5
+  br i1 %cmp1, label %l2, label %exit1
+l2:
+  %cmp2 = icmp slt i32 %iv2, 10
+  br i1 %cmp2, label %l3, label %exit2
+l3:
+  %cmp3 = icmp ne i32 %val, 0
+  br i1 %cmp3, label %l1, label %exit3
+
+exit1:
+  ret i32 1
+exit2:
+  ret i32 2
+exit3:
+  ret i32 3
+}
+
+; When loop unroll merges a loop exit with one of its parent loop's
+; exits, SCEV must forget its ExitNotTaken info.
+;
+; CHECK: @nestedUnroll
+; CHECK-NOT: br i1
+; CHECK: for.body87:
+define void @nestedUnroll() nounwind {
+entry:
+  br label %for.inc
+
+for.inc:
+  br i1 false, label %for.inc, label %for.body38.preheader
+
+for.body38.preheader:
+  br label %for.body38
+
+for.body38:
+  %i.113 = phi i32 [ %inc76, %for.inc74 ], [ 0, %for.body38.preheader ]
+  %mul48 = mul nsw i32 %i.113, 6
+  br label %for.body43
+
+for.body43:
+  %j.011 = phi i32 [ 0, %for.body38 ], [ %inc72, %for.body43 ]
+  %add49 = add nsw i32 %j.011, %mul48
+  %sh_prom50 = zext i32 %add49 to i64
+  %inc72 = add nsw i32 %j.011, 1
+  br i1 false, label %for.body43, label %for.inc74
+
+for.inc74:
+  %inc76 = add nsw i32 %i.113, 1
+  br i1 false, label %for.body38, label %for.body87.preheader
+
+for.body87.preheader:
+  br label %for.body87
+
+for.body87:
+  br label %for.body87
+}
+