IRCE: generalize to handle loops with decreasing induction variables.

IRCE can now split the iteration space for loops like: for (i = n; i >= 0; i--) a[i + k] = 42; // bounds check on access git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230618 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-06 09:44:39 +00:00 · 2015-02-26 08:19:31 +00:00 · 2015-02-26 08:19:31 +00:00 · 21b2edfeae
commit 21b2edfeae
parent 19c267aed1
5 changed files with 429 additions and 223 deletions
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@ -399,6 +399,52 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,

 namespace {

+// Keeps track of the structure of a loop.  This is similar to llvm::Loop,
+// except that it is more lightweight and can track the state of a loop through
+// changing and potentially invalid IR.  This structure also formalizes the
+// kinds of loops we can deal with -- ones that have a single latch that is also
+// an exiting block *and* have a canonical induction variable.
+struct LoopStructure {
+  const char *Tag;
+
+  BasicBlock *Header;
+  BasicBlock *Latch;
+
+  // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
+  // successor is `LatchExit', the exit block of the loop.
+  BranchInst *LatchBr;
+  BasicBlock *LatchExit;
+  unsigned LatchBrExitIdx;
+
+  Value *IndVarNext;
+  Value *IndVarStart;
+  Value *LoopExitAt;
+  bool IndVarIncreasing;
+
+  LoopStructure()
+      : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr),
+        LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr),
+        IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false) {}
+
+  template <typename M> LoopStructure map(M Map) const {
+    LoopStructure Result;
+    Result.Tag = Tag;
+    Result.Header = cast<BasicBlock>(Map(Header));
+    Result.Latch = cast<BasicBlock>(Map(Latch));
+    Result.LatchBr = cast<BranchInst>(Map(LatchBr));
+    Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
+    Result.LatchBrExitIdx = LatchBrExitIdx;
+    Result.IndVarNext = Map(IndVarNext);
+    Result.IndVarStart = Map(IndVarStart);
+    Result.LoopExitAt = Map(LoopExitAt);
+    Result.IndVarIncreasing = IndVarIncreasing;
+    return Result;
+  }
+
+  static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &, Loop &,
+                                                    const char *&);
+};
+
 /// This class is used to constrain loops to run within a given iteration space.
 /// The algorithm this class implements is given a Loop and a range [Begin,
 /// End).  The algorithm then tries to break out a "main loop" out of the loop
@ -409,51 +455,6 @@ namespace {
 /// iterations in which the induction variable is >= End.
 ///
 class LoopConstrainer {
-
-  // Keeps track of the structure of a loop.  This is similar to llvm::Loop,
-  // except that it is more lightweight and can track the state of a loop
-  // through changing and potentially invalid IR.  This structure also
-  // formalizes the kinds of loops we can deal with -- ones that have a single
-  // latch that is also an exiting block *and* have a canonical induction
-  // variable.
-  struct LoopStructure {
-    const char *Tag;
-
-    BasicBlock *Header;
-    BasicBlock *Latch;
-
-    // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
-    // successor is `LatchExit', the exit block of the loop.
-    BranchInst *LatchBr;
-    BasicBlock *LatchExit;
-    unsigned LatchBrExitIdx;
-
-    // The canonical induction variable.  It's value is `CIVStart` on the 0th
-    // itertion and `CIVNext` for all iterations after that.
-    PHINode *CIV;
-    Value *CIVStart;
-    Value *CIVNext;
-
-    LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr),
-                      LatchBr(nullptr), LatchExit(nullptr),
-                      LatchBrExitIdx(-1), CIV(nullptr),
-                      CIVStart(nullptr), CIVNext(nullptr) { }
-
-    template <typename M> LoopStructure map(M Map) const {
-      LoopStructure Result;
-      Result.Tag = Tag;
-      Result.Header = cast<BasicBlock>(Map(Header));
-      Result.Latch = cast<BasicBlock>(Map(Latch));
-      Result.LatchBr = cast<BranchInst>(Map(LatchBr));
-      Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
-      Result.LatchBrExitIdx = LatchBrExitIdx;
-      Result.CIV = cast<PHINode>(Map(CIV));
-      Result.CIVNext = Map(CIVNext);
-      Result.CIVStart = Map(CIVStart);
-      return Result;
-    }
-  };
-
  // The representation of a clone of the original loop we started out with.
  struct ClonedLoop {
    // The cloned blocks
@ -472,17 +473,22 @@ class LoopConstrainer {
    BasicBlock *PseudoExit;
    BasicBlock *ExitSelector;
    std::vector<PHINode *> PHIValuesAtPseudoExit;
+    PHINode *IndVarEnd;

-    RewrittenRangeInfo() : PseudoExit(nullptr), ExitSelector(nullptr) { }
+    RewrittenRangeInfo()
+        : PseudoExit(nullptr), ExitSelector(nullptr), IndVarEnd(nullptr) {}
  };

  // Calculated subranges we restrict the iteration space of the main loop to.
  // See the implementation of `calculateSubRanges' for more details on how
-  // these fields are computed.  `ExitPreLoopAt' is `None' if we don't need a
-  // pre loop.  `ExitMainLoopAt' is `None' if we don't need a post loop.
+  // these fields are computed.  `LowLimit` is None if there is no restriction
+  // on low end of the restricted iteration space of the main loop.  `HighLimit`
+  // is None if there is no restriction on high end of the restricted iteration
+  // space of the main loop.
+
  struct SubRanges {
-    Optional<Value *> ExitPreLoopAt;
-    Optional<Value *> ExitMainLoopAt;
+    Optional<const SCEV *> LowLimit;
+    Optional<const SCEV *> HighLimit;
  };

  // A utility function that does a `replaceUsesOfWith' on the incoming block
@ -491,19 +497,11 @@ class LoopConstrainer {
  static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
                              BasicBlock *ReplaceBy);

-  // Try to "parse" `OriginalLoop' and populate the various out parameters.
-  // Returns true on success, false on failure.
-  //
-  bool recognizeLoop(LoopStructure &LoopStructureOut,
-                     const SCEV *&LatchCountOut, BasicBlock *&PreHeaderOut,
-                     const char *&FailureReasonOut) const;
-
  // Compute a safe set of limits for the main loop to run in -- effectively the
  // intersection of `Range' and the iteration space of the original loop.
-  // Return the header count (1 + the latch taken count) in `HeaderCount'.
  // Return None if unable to compute the set of subranges.
  //
-  Optional<SubRanges> calculateSubRanges(Value *&HeaderCount) const;
+  Optional<SubRanges> calculateSubRanges() const;

  // Clone `OriginalLoop' and return the result in CLResult.  The IR after
  // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
@ -542,16 +540,15 @@ class LoopConstrainer {
  // The loop denoted by `LS' has `OldPreheader' as its preheader.  This
  // function creates a new preheader for `LS' and returns it.
  //
-  BasicBlock *createPreheader(const LoopConstrainer::LoopStructure &LS,
-                              BasicBlock *OldPreheader, const char *Tag) const;
+  BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader,
+                              const char *Tag) const;

  // `ContinuationBlockAndPreheader' was the continuation block for some call to
  // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
  // This function rewrites the PHI nodes in `LS.Header' to start with the
  // correct value.
  void rewriteIncomingValuesForPHIs(
-      LoopConstrainer::LoopStructure &LS,
-      BasicBlock *ContinuationBlockAndPreheader,
+      LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader,
      const LoopConstrainer::RewrittenRangeInfo &RRI) const;

  // Even though we do not preserve any passes at this time, we at least need to
@ -570,7 +567,6 @@ class LoopConstrainer {
  LoopInfo &OriginalLoopInfo;
  const SCEV *LatchTakenCount;
  BasicBlock *OriginalPreheader;
-  Value *OriginalHeaderCount;

  // The preheader of the main loop.  This may or may not be different from
  // `OriginalPreheader'.
@ -584,12 +580,12 @@ class LoopConstrainer {
  LoopStructure MainLoopStructure;

 public:
-  LoopConstrainer(Loop &L, LoopInfo &LI, ScalarEvolution &SE,
-                  InductiveRangeCheck::Range R)
-    : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE),
-      OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
-      OriginalPreheader(nullptr), OriginalHeaderCount(nullptr),
-      MainLoopPreheader(nullptr), Range(R) { }
+  LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS,
+                  ScalarEvolution &SE, InductiveRangeCheck::Range R)
+      : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
+        SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
+        OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R),
+        MainLoopStructure(LS) {}

  // Entry point for the algorithm.  Returns true on success.
  bool run();
@ -604,155 +600,246 @@ void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
      PN->setIncomingBlock(i, ReplaceBy);
 }

-bool LoopConstrainer::recognizeLoop(LoopStructure &LoopStructureOut,
-                                    const SCEV *&LatchCountOut,
-                                    BasicBlock *&PreheaderOut,
-                                    const char *&FailureReason) const {
-  using namespace llvm::PatternMatch;
+static bool CanBeSMax(ScalarEvolution &SE, const SCEV *S) {
+  APInt SMax =
+      APInt::getSignedMaxValue(cast<IntegerType>(S->getType())->getBitWidth());
+  return SE.getSignedRange(S).contains(SMax) &&
+         SE.getUnsignedRange(S).contains(SMax);
+}

-  assert(OriginalLoop.isLoopSimplifyForm() &&
-         "should follow from addRequired<>");
+static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) {
+  APInt SMin =
+      APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth());
+  return SE.getSignedRange(S).contains(SMin) &&
+         SE.getUnsignedRange(S).contains(SMin);
+}

-  BasicBlock *Latch = OriginalLoop.getLoopLatch();
-  if (!OriginalLoop.isLoopExiting(Latch)) {
+Optional<LoopStructure>
+LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
+                                  const char *&FailureReason) {
+  assert(L.isLoopSimplifyForm() && "should follow from addRequired<>");
+
+  BasicBlock *Latch = L.getLoopLatch();
+  if (!L.isLoopExiting(Latch)) {
    FailureReason = "no loop latch";
-    return false;
+    return None;
  }

-  PHINode *CIV = OriginalLoop.getCanonicalInductionVariable();
-  assert(CIV && "precondition");
-
-  BasicBlock *Header = OriginalLoop.getHeader();
-  BasicBlock *Preheader = OriginalLoop.getLoopPreheader();
+  BasicBlock *Header = L.getHeader();
+  BasicBlock *Preheader = L.getLoopPreheader();
  if (!Preheader) {
    FailureReason = "no preheader";
-    return false;
+    return None;
  }

-  Value *CIVNext = CIV->getIncomingValueForBlock(Latch);
-  Value *CIVStart = CIV->getIncomingValueForBlock(Preheader);
-
-  const SCEV *LatchCount = SE.getExitCount(&OriginalLoop, Latch);
-  if (isa<SCEVCouldNotCompute>(LatchCount)) {
-    FailureReason = "could not compute latch count";
-    return false;
-  }
-
-  // While SCEV does most of the analysis for us, we still have to
-  // modify the latch; and currently we can only deal with certain
-  // kinds of latches.  This can be made more sophisticated as needed.
-
  BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin());
-
  if (!LatchBr || LatchBr->isUnconditional()) {
    FailureReason = "latch terminator not conditional branch";
-    return false;
+    return None;
  }

-  // Currently we only support a latch condition of the form:
-  //
-  //  %condition = icmp slt %civNext, %limit
-  //  br i1 %condition, label %header, label %exit
+  unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;

-  if (LatchBr->getSuccessor(0) != Header) {
-    FailureReason = "unknown latch form (header not first successor)";
-    return false;
+  ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
+  if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
+    FailureReason = "latch terminator branch not conditional on integral icmp";
+    return None;
  }

-  Value *CIVComparedTo = nullptr;
-  ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
-  if (!(match(LatchBr->getCondition(),
-              m_ICmp(Pred, m_Specific(CIVNext), m_Value(CIVComparedTo))) &&
-        Pred == ICmpInst::ICMP_SLT)) {
-    FailureReason = "unknown latch form (not slt)";
-    return false;
+  const SCEV *LatchCount = SE.getExitCount(&L, Latch);
+  if (isa<SCEVCouldNotCompute>(LatchCount)) {
+    FailureReason = "could not compute latch count";
+    return None;
  }

-  // IndVarSimplify will sometimes leave behind (in SCEV's cache) backedge-taken
-  // counts that are narrower than the canonical induction variable.  These
-  // values are still accurate, and we could probably use them after sign/zero
-  // extension; but for now we just bail out of the transformation to keep
-  // things simple.
-  const SCEV *CIVComparedToSCEV = SE.getSCEV(CIVComparedTo);
-  if (isa<SCEVCouldNotCompute>(CIVComparedToSCEV) ||
-      CIVComparedToSCEV->getType() != LatchCount->getType()) {
-    FailureReason = "could not relate CIV to latch expression";
-    return false;
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *LeftValue = ICI->getOperand(0);
+  const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
+  IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
+
+  Value *RightValue = ICI->getOperand(1);
+  const SCEV *RightSCEV = SE.getSCEV(RightValue);
+
+  // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
+  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
+    if (isa<SCEVAddRecExpr>(RightSCEV)) {
+      std::swap(LeftSCEV, RightSCEV);
+      std::swap(LeftValue, RightValue);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+    } else {
+      FailureReason = "no add recurrences in the icmp";
+      return None;
+    }
  }

-  const SCEV *ShouldBeOne = SE.getMinusSCEV(CIVComparedToSCEV, LatchCount);
-  const SCEVConstant *SCEVOne = dyn_cast<SCEVConstant>(ShouldBeOne);
-  if (!SCEVOne || SCEVOne->getValue()->getValue() != 1) {
-    FailureReason = "unexpected header count in latch";
+  auto IsInductionVar = [&SE](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
+    if (!AR->isAffine())
+      return false;
+
+    IntegerType *Ty = cast<IntegerType>(AR->getType());
+    IntegerType *WideTy =
+        IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
+
+    // Currently we only work with induction variables that have been proved to
+    // not wrap.  This restriction can potentially be lifted in the future.
+
+    const SCEVAddRecExpr *ExtendAfterOp =
+        dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+    if (!ExtendAfterOp)
+      return false;
+
+    const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
+    const SCEV *ExtendedStep =
+        SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
+
+    bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
+                        ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
+
+    if (!NoSignedWrap)
+      return false;
+
+    if (const SCEVConstant *StepExpr =
+            dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) {
+      ConstantInt *StepCI = StepExpr->getValue();
+      if (StepCI->isOne() || StepCI->isMinusOne()) {
+        IsIncreasing = StepCI->isOne();
+        return true;
+      }
+    }
+
    return false;
+  };
+
+  // `ICI` is interpreted as taking the backedge if the *next* value of the
+  // induction variable satisfies some constraint.
+
+  const SCEVAddRecExpr *IndVarNext = cast<SCEVAddRecExpr>(LeftSCEV);
+  bool IsIncreasing = false;
+  if (!IsInductionVar(IndVarNext, IsIncreasing)) {
+    FailureReason = "LHS in icmp not induction variable";
+    return None;
  }

-  unsigned LatchBrExitIdx = 1;
+  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
+  // TODO: generalize the predicates here to also match their unsigned variants.
+  if (IsIncreasing) {
+    bool FoundExpectedPred =
+        (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) ||
+        (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0);
+
+    if (!FoundExpectedPred) {
+      FailureReason = "expected icmp slt semantically, found something else";
+      return None;
+    }
+
+    if (LatchBrExitIdx == 0) {
+      if (CanBeSMax(SE, RightSCEV)) {
+        // TODO: this restriction is easily removable -- we just have to
+        // remember that the icmp was an slt and not an sle.
+        FailureReason = "limit may overflow when coercing sle to slt";
+        return None;
+      }
+
+      IRBuilder<> B(&*Preheader->rbegin());
+      RightValue = B.CreateAdd(RightValue, One);
+    }
+
+  } else {
+    bool FoundExpectedPred =
+        (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
+        (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0);
+
+    if (!FoundExpectedPred) {
+      FailureReason = "expected icmp sgt semantically, found something else";
+      return None;
+    }
+
+    if (LatchBrExitIdx == 0) {
+      if (CanBeSMin(SE, RightSCEV)) {
+        // TODO: this restriction is easily removable -- we just have to
+        // remember that the icmp was an sgt and not an sge.
+        FailureReason = "limit may overflow when coercing sge to sgt";
+        return None;
+      }
+
+      IRBuilder<> B(&*Preheader->rbegin());
+      RightValue = B.CreateSub(RightValue, One);
+    }
+  }
+
+  const SCEV *StartNext = IndVarNext->getStart();
+  const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
+  const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
+
  BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);

-  assert(SE.getLoopDisposition(LatchCount, &OriginalLoop) ==
+  assert(SE.getLoopDisposition(LatchCount, &L) ==
             ScalarEvolution::LoopInvariant &&
         "loop variant exit count doesn't make sense!");

-  assert(!OriginalLoop.contains(LatchExit) && "expected an exit block!");
+  assert(!L.contains(LatchExit) && "expected an exit block!");

-  LoopStructureOut.Tag = "main";
-  LoopStructureOut.Header = Header;
-  LoopStructureOut.Latch = Latch;
-  LoopStructureOut.LatchBr = LatchBr;
-  LoopStructureOut.LatchExit = LatchExit;
-  LoopStructureOut.LatchBrExitIdx = LatchBrExitIdx;
-  LoopStructureOut.CIV = CIV;
-  LoopStructureOut.CIVNext = CIVNext;
-  LoopStructureOut.CIVStart = CIVStart;
+  Value *IndVarStartV = SCEVExpander(SE, "irce").expandCodeFor(
+      IndVarStart, IndVarTy, &*Preheader->rbegin());
+  IndVarStartV->setName("indvar.start");
+
+  LoopStructure Result;
+
+  Result.Tag = "main";
+  Result.Header = Header;
+  Result.Latch = Latch;
+  Result.LatchBr = LatchBr;
+  Result.LatchExit = LatchExit;
+  Result.LatchBrExitIdx = LatchBrExitIdx;
+  Result.IndVarStart = IndVarStartV;
+  Result.IndVarNext = LeftValue;
+  Result.IndVarIncreasing = IsIncreasing;
+  Result.LoopExitAt = RightValue;

-  LatchCountOut = LatchCount;
-  PreheaderOut = Preheader;
  FailureReason = nullptr;

-  return true;
+  return Result;
 }

 Optional<LoopConstrainer::SubRanges>
-LoopConstrainer::calculateSubRanges(Value *&HeaderCountOut) const {
+LoopConstrainer::calculateSubRanges() const {
  IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());

  if (Range.getType() != Ty)
    return None;

-  SCEVExpander Expander(SE, "irce");
-  Instruction *InsertPt = OriginalPreheader->getTerminator();
-
  LoopConstrainer::SubRanges Result;

  // I think we can be more aggressive here and make this nuw / nsw if the
  // addition that feeds into the icmp for the latch's terminating branch is nuw
  // / nsw.  In any case, a wrapping 2's complement addition is safe.
  ConstantInt *One = ConstantInt::get(Ty, 1);
-  const SCEV *HeaderCountSCEV = SE.getAddExpr(LatchTakenCount, SE.getSCEV(One));
-  HeaderCountOut = Expander.expandCodeFor(HeaderCountSCEV, Ty, InsertPt);
+  const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
+  const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);

-  const SCEV *Zero = SE.getConstant(Ty, 0);
+  bool Increasing = MainLoopStructure.IndVarIncreasing;
+  // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the
+  // range of values the induction variable takes.
+  const SCEV *Smallest =
+      Increasing ? Start : SE.getAddExpr(End, SE.getSCEV(One));
+  const SCEV *Greatest =
+      Increasing ? End : SE.getAddExpr(Start, SE.getSCEV(One));
+
+  auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
+    return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S));
+  };

  // In some cases we can prove that we don't need a pre or post loop

  bool ProvablyNoPreloop =
-    SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Zero);
-  if (!ProvablyNoPreloop) {
-    const SCEV *ExitPreLoopAtSCEV =
-      SE.getSMinExpr(HeaderCountSCEV, Range.getBegin());
-    Result.ExitPreLoopAt =
-      Expander.expandCodeFor(ExitPreLoopAtSCEV, Ty, InsertPt);
-  }
+      SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Smallest);
+  if (!ProvablyNoPreloop)
+    Result.LowLimit = Clamp(Range.getBegin());

  bool ProvablyNoPostLoop =
-    SE.isKnownPredicate(ICmpInst::ICMP_SLE, HeaderCountSCEV, Range.getEnd());
-  if (!ProvablyNoPostLoop) {
-    const SCEV *ExitMainLoopAtSCEV =
-      SE.getSMinExpr(HeaderCountSCEV, Range.getEnd());
-    Result.ExitMainLoopAt =
-      Expander.expandCodeFor(ExitMainLoopAtSCEV, Ty, InsertPt);
-  }
+      SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd());
+  if (!ProvablyNoPostLoop)
+    Result.HighLimit = Clamp(Range.getEnd());

  return Result;
 }
@ -809,7 +896,7 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
 }

 LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
-    const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt,
+    const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
    BasicBlock *ContinuationBlock) const {

  // We start with a loop with a single latch:
@ -893,32 +980,37 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
                                      BBInsertLocation);

  BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
+  bool Increasing = LS.IndVarIncreasing;

  IRBuilder<> B(PreheaderJump);

  // EnterLoopCond - is it okay to start executing this `LS'?
-  Value *EnterLoopCond = B.CreateICmpSLT(LS.CIVStart, ExitLoopAt);
+  Value *EnterLoopCond = Increasing
+                             ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt)
+                             : B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt);
+
  B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
  PreheaderJump->eraseFromParent();

-  assert(LS.LatchBrExitIdx == 1 && "generalize this as needed!");
-
-  B.SetInsertPoint(LS.LatchBr);
-
-  // ContinueCond - is it okay to execute the next iteration in `LS'?
-  Value *ContinueCond = B.CreateICmpSLT(LS.CIVNext, ExitLoopAt);
-
-  LS.LatchBr->setCondition(ContinueCond);
-  assert(LS.LatchBr->getSuccessor(LS.LatchBrExitIdx) == LS.LatchExit &&
-         "invariant!");
  LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
+  B.SetInsertPoint(LS.LatchBr);
+  Value *TakeBackedgeLoopCond =
+      Increasing ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt)
+                 : B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt);
+  Value *CondForBranch = LS.LatchBrExitIdx == 1
+                             ? TakeBackedgeLoopCond
+                             : B.CreateNot(TakeBackedgeLoopCond);
+
+  LS.LatchBr->setCondition(CondForBranch);

  B.SetInsertPoint(RRI.ExitSelector);

  // IterationsLeft - are there any more iterations left, given the original
  // upper bound on the induction variable?  If not, we branch to the "real"
  // exit.
-  Value *IterationsLeft = B.CreateICmpSLT(LS.CIVNext, OriginalHeaderCount);
+  Value *IterationsLeft = Increasing
+                              ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt)
+                              : B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt);
  B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);

  BranchInst *BranchToContinuation =
@ -942,6 +1034,11 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
    RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
  }

+  RRI.IndVarEnd = PHINode::Create(LS.IndVarNext->getType(), 2, "indvar.end",
+                                  BranchToContinuation);
+  RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader);
+  RRI.IndVarEnd->addIncoming(LS.IndVarNext, RRI.ExitSelector);
+
  // The latch exit now has a branch from `RRI.ExitSelector' instead of
  // `LS.Latch'.  The PHI nodes need to be updated to reflect that.
  for (Instruction &I : *LS.LatchExit) {
@ -955,7 +1052,7 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
 }

 void LoopConstrainer::rewriteIncomingValuesForPHIs(
-    LoopConstrainer::LoopStructure &LS, BasicBlock *ContinuationBlock,
+    LoopStructure &LS, BasicBlock *ContinuationBlock,
    const LoopConstrainer::RewrittenRangeInfo &RRI) const {

  unsigned PHIIndex = 0;
@ -970,13 +1067,12 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
        PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
  }

-  LS.CIVStart = LS.CIV->getIncomingValueForBlock(ContinuationBlock);
+  LS.IndVarStart = RRI.IndVarEnd;
 }

-BasicBlock *
-LoopConstrainer::createPreheader(const LoopConstrainer::LoopStructure &LS,
-                                 BasicBlock *OldPreheader,
-                                 const char *Tag) const {
+BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
+                                             BasicBlock *OldPreheader,
+                                             const char *Tag) const {

  BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
  BranchInst::Create(LS.Header, Preheader);
@ -1004,30 +1100,79 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {

 bool LoopConstrainer::run() {
  BasicBlock *Preheader = nullptr;
-  const char *CouldNotProceedBecause = nullptr;
-  if (!recognizeLoop(MainLoopStructure, LatchTakenCount, Preheader,
-                     CouldNotProceedBecause)) {
-    DEBUG(dbgs() << "irce: could not recognize loop, " << CouldNotProceedBecause
-                 << "\n";);
-    return false;
-  }
+  LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch);
+  Preheader = OriginalLoop.getLoopPreheader();
+  assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr &&
+         "preconditions!");

  OriginalPreheader = Preheader;
  MainLoopPreheader = Preheader;

-  Optional<SubRanges> MaybeSR = calculateSubRanges(OriginalHeaderCount);
+  Optional<SubRanges> MaybeSR = calculateSubRanges();
  if (!MaybeSR.hasValue()) {
    DEBUG(dbgs() << "irce: could not compute subranges\n");
    return false;
  }
+
  SubRanges SR = MaybeSR.getValue();
+  bool Increasing = MainLoopStructure.IndVarIncreasing;
+  IntegerType *IVTy =
+      cast<IntegerType>(MainLoopStructure.IndVarNext->getType());
+
+  SCEVExpander Expander(SE, "irce");
+  Instruction *InsertPt = OriginalPreheader->getTerminator();

  // It would have been better to make `PreLoop' and `PostLoop'
  // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
  // constructor.
  ClonedLoop PreLoop, PostLoop;
-  bool NeedsPreLoop = SR.ExitPreLoopAt.hasValue();
-  bool NeedsPostLoop = SR.ExitMainLoopAt.hasValue();
+  bool NeedsPreLoop =
+      Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue();
+  bool NeedsPostLoop =
+      Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue();
+
+  Value *ExitPreLoopAt = nullptr;
+  Value *ExitMainLoopAt = nullptr;
+  const SCEVConstant *MinusOneS =
+      cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
+
+  if (NeedsPreLoop) {
+    const SCEV *ExitPreLoopAtSCEV = nullptr;
+
+    if (Increasing)
+      ExitPreLoopAtSCEV = *SR.LowLimit;
+    else {
+      if (CanBeSMin(SE, *SR.HighLimit)) {
+        DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+                     << "preloop exit limit.  HighLimit = " << *(*SR.HighLimit)
+                     << "\n");
+        return false;
+      }
+      ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
+    }
+
+    ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
+    ExitPreLoopAt->setName("exit.preloop.at");
+  }
+
+  if (NeedsPostLoop) {
+    const SCEV *ExitMainLoopAtSCEV = nullptr;
+
+    if (Increasing)
+      ExitMainLoopAtSCEV = *SR.HighLimit;
+    else {
+      if (CanBeSMin(SE, *SR.LowLimit)) {
+        DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+                     << "mainloop exit limit.  LowLimit = " << *(*SR.LowLimit)
+                     << "\n");
+        return false;
+      }
+      ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
+    }
+
+    ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
+    ExitMainLoopAt->setName("exit.mainloop.at");
+  }

  // We clone these ahead of time so that we don't have to deal with changing
  // and temporarily invalid IR as we transform the loops.
@ -1044,9 +1189,8 @@ bool LoopConstrainer::run() {

    MainLoopPreheader =
        createPreheader(MainLoopStructure, Preheader, "mainloop");
-    PreLoopRRI =
-        changeIterationSpaceEnd(PreLoop.Structure, Preheader,
-                                SR.ExitPreLoopAt.getValue(), MainLoopPreheader);
+    PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
+                                         ExitPreLoopAt, MainLoopPreheader);
    rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
                                 PreLoopRRI);
  }
@ -1058,8 +1202,7 @@ bool LoopConstrainer::run() {
    PostLoopPreheader =
        createPreheader(PostLoop.Structure, Preheader, "postloop");
    PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
-                                          SR.ExitMainLoopAt.getValue(),
-                                          PostLoopPreheader);
+                                          ExitMainLoopAt, PostLoopPreheader);
    rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
                                 PostLoopRRI);
  }
@ -1179,13 +1322,6 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
  ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();

-  PHINode *CIV = L->getCanonicalInductionVariable();
-  if (!CIV) {
-    DEBUG(dbgs() << "irce: loop has no canonical induction variable\n");
-    return false;
-  }
-  const SCEVAddRecExpr *IndVar = cast<SCEVAddRecExpr>(SE.getSCEV(CIV));
-
  for (auto BBI : L->getBlocks())
    if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
      if (InductiveRangeCheck *IRC =
@ -1202,6 +1338,21 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
          IRC->print(dbgs());
    );

+  const char *FailureReason = nullptr;
+  Optional<LoopStructure> MaybeLoopStructure =
+      LoopStructure::parseLoopStructure(SE, *L, FailureReason);
+  if (!MaybeLoopStructure.hasValue()) {
+    DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason
+                 << "\n";);
+    return false;
+  }
+  LoopStructure LS = MaybeLoopStructure.getValue();
+  bool Increasing = LS.IndVarIncreasing;
+  const SCEV *MinusOne =
+      SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true);
+  const SCEVAddRecExpr *IndVar =
+      cast<SCEVAddRecExpr>(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne));
+
  Optional<InductiveRangeCheck::Range> SafeIterRange;
  Instruction *ExprInsertPt = Preheader->getTerminator();

@ -1223,8 +1374,8 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
  if (!SafeIterRange.hasValue())
    return false;

-  LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), SE,
-                     SafeIterRange.getValue());
+  LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS,
+                     SE, SafeIterRange.getValue());
  bool Changed = LC.run();

  if (Changed) {
--- a/test/Transforms/IRCE/decrementing-loop.ll
+++ b/test/Transforms/IRCE/decrementing-loop.ll
@ -0,0 +1,43 @@
+; RUN: opt -irce -S < %s | FileCheck %s
+
+define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
+ entry:
+  %len = load i32* %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  %start = sub i32 %n, 1
+  br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ %start, %entry ] , [ %idx.dec, %in.bounds ]
+  %idx.dec = sub i32 %idx, 1
+  %abc.high = icmp slt i32 %idx, %len
+  %abc.low = icmp sge i32 %idx, 0
+  %abc = and i1 %abc.low, %abc.high
+  br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+ in.bounds:
+  %addr = getelementptr i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp sgt i32 %idx.dec, -1
+  br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+  ret void
+
+ exit:
+  ret void
+
+; CHECK: loop.preheader:
+; CHECK:   [[indvar_start:[^ ]+]] = add i32 %n, -1
+; CHECK:   [[not_len:[^ ]+]] = sub i32 -1, %len
+; CHECK:   [[not_n:[^ ]+]] = sub i32 -1, %n
+; CHECK:   [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]]
+; CHECK:   [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]]
+; CHECK:   [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]]
+; CHECK:   [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0
+; CHECK:   [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0
+; CHECK:   %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1
+}
+
+!0 = !{i32 0, i32 2147483647}
+!1 = !{!"branch_weights", i32 64, i32 4}
--- a/test/Transforms/IRCE/multiple-access-no-preloop.ll
+++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll
@ -42,9 +42,11 @@ define void @multiple_access_no_preloop(
 ; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
 ; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
-; CHECK: [[not_upper_limit_cond:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
-; CHECK: [[not_upper_limit:[^ ]+]] = select i1 [[not_upper_limit_cond]], i32 [[smax_not_len]], i32 [[not_n]]
-; CHECK: [[upper_limit:[^ ]+]] = sub i32 -1, [[not_upper_limit]]
+; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
+; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
+; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
+; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
+; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0

 ; CHECK-LABEL: loop:
 ; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds
--- a/test/Transforms/IRCE/single-access-no-preloop.ll
+++ b/test/Transforms/IRCE/single-access-no-preloop.ll
@ -36,6 +36,7 @@ define void @single_access_no_preloop_no_offset(i32 *%arr, i32 *%a_len_ptr, i32

 ; CHECK-LABEL: main.pseudo.exit:
 ; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ]
 ; CHECK-NEXT: br label %postloop

 ; CHECK-LABEL: postloop:
@ -85,17 +86,19 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3
 ; CHECK-LABEL: loop.preheader:
 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
 ; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
-; CHECK: [[not_exit_main_loop_at_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
-; CHECK: [[not_exit_main_loop_at:[^ ]+]] = select i1 [[not_exit_main_loop_at_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
-; CHECK: [[exit_main_loop_at:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at]]
-; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at]]
+; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
+; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
+; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
+; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
+; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
+; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
 ; CHECK: br i1 [[enter_main_loop]], label %loop, label %main.pseudo.exit

 ; CHECK-LABEL: loop:
 ; CHECK: br i1 true, label %in.bounds, label %out.of.bounds

 ; CHECK-LABEL: in.bounds:
-; CHECK: [[continue_main_loop:[^ ]+]] = icmp slt i32 %idx.next, [[exit_main_loop_at]]
+; CHECK: [[continue_main_loop:[^ ]+]] = icmp slt i32 %idx.next, [[exit_main_loop_at_loclamp]]
 ; CHECK: br i1 [[continue_main_loop]], label %loop, label %main.exit.selector

 ; CHECK-LABEL: main.pseudo.exit:
--- a/test/Transforms/IRCE/single-access-with-preloop.ll
+++ b/test/Transforms/IRCE/single-access-with-preloop.ll
@ -31,14 +31,21 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
 ; CHECK-LABEL: loop.preheader:
 ; CHECK: [[not_safe_start:[^ ]+]] = add i32 %offset, -1
 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
-; CHECK: [[not_exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]]
-; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cond]], i32 [[not_safe_start]], i32 [[not_n]]
-; CHECK: [[exit_preloop_at:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at]]
+; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]]
+; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]]
+; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]]
+; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0
+; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0
+
+
+; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 %offset, -1
+; CHECK: [[not_safe_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len
+; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_end]], [[not_n]]
+; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_end]], i32 [[not_n]]
+; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_loclamp]]
+; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_loclamp]], 0
+; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_loclamp]], i32 0

-; CHECK: [[not_safe_end:[^ ]+]] = sub i32 [[not_safe_start]], %len
-; CHECK: [[not_exit_mainloop_at_cond:[^ ]+]] = icmp sgt i32 [[not_safe_end]], [[not_n]]
-; CHECK: [[not_exit_mainloop_at:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond]], i32 [[not_safe_end]], i32 [[not_n]]
-; CHECK: [[exit_mainloop_at:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at]]

 ; CHECK-LABEL: in.bounds:
 ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]