indvars: fixed IV cloning in -disable-iv-rewrite mode with associated

cleanup and overdue test cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132038 91177308-0d34-0410-b5e6-96231b3b80d8
2025-08-15 22:28:18 +00:00 · 2011-05-25 04:42:22 +00:00
parent 47268164f3
commit 03d3d3b361
3 changed files with 330 additions and 52 deletions
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -67,6 +67,9 @@ STATISTIC(NumWidened , "Number of indvars widened");
 STATISTIC(NumInserted, "Number of canonical indvars added");
 STATISTIC(NumReplaced, "Number of exit values replaced");
 STATISTIC(NumLFTR    , "Number of loop exit tests replaced");
 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
 STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
 STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
 // DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis
 // and referenced here.
@@ -117,9 +120,6 @@ namespace {
                              PHINode *IVPhi);
    void RewriteNonIntegerIVs(Loop *L);
    bool canExpandBackedgeTakenCount(Loop *L,
                                     const SCEV *BackedgeTakenCount);
    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                        PHINode *IndVar,
                                        SCEVExpander &Rewriter);
@@ -200,9 +200,8 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
 /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
 /// count expression can be safely and cheaply expanded into an instruction
 /// sequence that can be used by LinearFunctionTestReplace.
-bool IndVarSimplify::
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
-canExpandBackedgeTakenCount(Loop *L,
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
                            const SCEV *BackedgeTakenCount) {
  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
      BackedgeTakenCount->isZero())
    return false;
@@ -235,6 +234,36 @@ canExpandBackedgeTakenCount(Loop *L,
  return true;
 }
 /// getBackedgeIVType - Get the widest type used by the loop test after peeking
 /// through Truncs.
 ///
 /// TODO: Unnecessary once LinearFunctionTestReplace is removed.
 static const Type *getBackedgeIVType(Loop *L) {
  if (!L->getExitingBlock())
    return 0;
  // Can't rewrite non-branch yet.
  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
  if (!BI)
    return 0;
  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
  if (!Cond)
    return 0;
  const Type *Ty = 0;
  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
      OI != OE; ++OI) {
    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
    if (!Trunc)
      continue;
    return Trunc->getSrcTy();
  }
  return Ty;
 }
 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
 /// loop to be a canonical != comparison against the incremented loop induction
 /// variable.  This pass is able to rewrite the exit tests of any loop where the
@@ -245,7 +274,7 @@ LinearFunctionTestReplace(Loop *L,
                          const SCEV *BackedgeTakenCount,
                          PHINode *IndVar,
                          SCEVExpander &Rewriter) {
-  assert(canExpandBackedgeTakenCount(L, BackedgeTakenCount) && "precondition");
+  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
  // If the exiting block is not the same as the backedge block, we must compare
@@ -536,12 +565,12 @@ public:
  bool CreateWideIV(SCEVExpander &Rewriter);
 protected:
  const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
  Instruction *CloneIVUser(Instruction *NarrowUse,
                           Instruction *NarrowDef,
                           Instruction *WideDef);
  const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
  Instruction *WidenIVUse(Instruction *NarrowUse,
                          Instruction *NarrowDef,
                          Instruction *WideDef);
@@ -594,24 +623,10 @@ void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
  }
 }
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+static Value *getExtend( Value *NarrowOper, const Type *WideType,
-// perspective after widening it's type? In other words, can the extend be
+                               bool IsSigned, IRBuilder<> &Builder) {
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
-// on the same loop. If so, return the sign or zero extended
+                    Builder.CreateZExt(NarrowOper, WideType);
 // recurrence. Otherwise return NULL.
 const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
  if (!SE->isSCEVable(NarrowUse->getType()))
    return 0;
  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
  const SCEV *WideExpr = IsSigned ?
    SE->getSignExtendExpr(NarrowExpr, WideType) :
    SE->getZeroExtendExpr(NarrowExpr, WideType);
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
  if (!AddRec || AddRec->getLoop() != L)
    return 0;
  return AddRec;
 }
 /// CloneIVUser - Instantiate a wide operation to replace a narrow
@@ -636,36 +651,51 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
  case Instruction::AShr:
    DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n");
    IRBuilder<> Builder(NarrowUse);
    // Replace NarrowDef operands with WideDef. Otherwise, we don't know
    // anything about the narrow operand yet so must insert a [sz]ext. It is
    // probably loop invariant and will be folded or hoisted. If it actually
    // comes from a widened IV, it should be removed during a future call to
    // WidenIVUse.
    Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef :
      getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder);
    Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef :
      getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder);
    BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse);
    BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
-                                                    NarrowBO->getOperand(0),
+                                                    LHS, RHS,
                                                    NarrowBO->getOperand(1),
                                                    NarrowBO->getName());
    IRBuilder<> Builder(NarrowUse);
    Builder.Insert(WideBO);
    if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
    if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
-    for (unsigned i = 0; i < NarrowBO->getNumOperands(); ++i) {
+    return WideBO;
      Value *NarrowOper = NarrowBO->getOperand(i);
      if (NarrowOper == NarrowDef) {
        WideBO->setOperand(i, WideDef);
        continue;
      }
      // We don't know anything about the other operand here so must insert a
      // [sz]ext. It is probably loop invariant and will be folded or
      // hoisted. If it actually comes from a widened IV, it should be removed
      // during a future call to WidenIVUse.
      IRBuilder<> Builder(NarrowUse);
      Value *Extend = IsSigned ?
        Builder.CreateSExt(NarrowOper, WideType) :
        Builder.CreateZExt(NarrowOper, WideType);
      WideBO->setOperand(i, Extend);
    }
  }
  llvm_unreachable(0);
 }
 // GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
 // perspective after widening it's type? In other words, can the extend be
 // safely hoisted out of the loop with SCEV reducing the value to a recurrence
 // on the same loop. If so, return the sign or zero extended
 // recurrence. Otherwise return NULL.
 const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
  if (!SE->isSCEVable(NarrowUse->getType()))
    return 0;
  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
  const SCEV *WideExpr = IsSigned ?
    SE->getSignExtendExpr(NarrowExpr, WideType) :
    SE->getZeroExtendExpr(NarrowExpr, WideType);
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
  if (!AddRec || AddRec->getLoop() != L)
    return 0;
  return AddRec;
 }
 /// WidenIVUse - Determine whether an individual user of the narrow IV can be
 /// widened. If so, return the wide clone of the user.
 Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
@@ -682,9 +712,32 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
  // Our raison d'etre! Eliminate sign and zero extension.
  if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) {
-    NarrowUse->replaceAllUsesWith(WideDef);
+    Value *NewDef = WideDef;
-    DeadInsts.push_back(NarrowUse);
+    if (NarrowUse->getType() != WideType) {
-
+      unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType());
      unsigned IVWidth = SE->getTypeSizeInBits(WideType);
      if (CastWidth < IVWidth) {
        // The cast isn't as wide as the IV, so insert a Trunc.
        IRBuilder<> Builder(NarrowUse);
        NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
      }
      else {
        // A wider extend was hidden behind a narrower one. This may induce
        // another round of IV widening in which the intermediate IV becomes
        // dead. It should be very rare.
        DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
              << " not wide enough to subsume " << *NarrowUse << "\n");
        NarrowUse->replaceUsesOfWith(NarrowDef, WideDef);
        NewDef = NarrowUse;
      }
    }
    if (NewDef != NarrowUse) {
      DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse
            << " replaced by " << *WideDef << "\n");
      ++NumElimExt;
      NarrowUse->replaceAllUsesWith(NewDef);
      DeadInsts.push_back(NarrowUse);
    }
    // Now that the extend is gone, expose it's uses to IVUsers for potential
    // further simplification within SimplifyIVUsers.
    IU->AddUsersIfInteresting(WideDef, WidePhi);
@@ -698,7 +751,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
    // follow it. Instead insert a Trunc to kill off the original use,
    // eventually isolating the original narrow IV so it can be removed.
    IRBuilder<> Builder(NarrowUse);
-    Value *Trunc = Builder.CreateTrunc(WideDef, NarrowUse->getType());
+    Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
    NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
    return 0;
  }
@@ -834,6 +887,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
    return;
  DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
  ++NumElimCmp;
  Changed = true;
  DeadInsts.push_back(ICmp);
 }
@@ -888,6 +942,7 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
    IU->AddUsersIfInteresting(I, IVPhi);
  DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
  ++NumElimRem;
  Changed = true;
  DeadInsts.push_back(Rem);
 }
@@ -940,13 +995,20 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  // a canonical induction variable should be inserted.
  const Type *LargestType = 0;
  bool NeedCannIV = false;
-  bool ExpandBECount = canExpandBackedgeTakenCount(L, BackedgeTakenCount);
+  bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
  if (ExpandBECount) {
    // If we have a known trip count and a single exit block, we'll be
    // rewriting the loop exit test condition below, which requires a
    // canonical induction variable.
    NeedCannIV = true;
    const Type *Ty = BackedgeTakenCount->getType();
    if (DisableIVRewrite) {
      // In this mode, SimplifyIVUsers may have already widened the IV used by
      // the backedge test and inserted a Trunc on the compare's operand. Get
      // the wider type to avoid creating a redundant narrow IV only used by the
      // loop test.
      LargestType = getBackedgeIVType(L);
    }
    if (!LargestType ||
        SE->getTypeSizeInBits(Ty) >
        SE->getTypeSizeInBits(LargestType))
@@ -1002,7 +1064,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  // using it.  We can currently only handle loops with a single exit.
  ICmpInst *NewICmp = 0;
  if (ExpandBECount) {
-    assert(canExpandBackedgeTakenCount(L, BackedgeTakenCount) &&
+    assert(canExpandBackedgeTakenCount(L, SE) &&
           "canonical IV disrupted BackedgeTaken expansion");
    assert(NeedCannIV &&
           "LinearFunctionTestReplace requires a canonical induction variable");
--- a/test/Transforms/IndVarSimplify/elim-extend.ll
+++ b/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -0,0 +1,93 @@
 ; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 ; Tests sign extend elimination in the inner and outer loop.
 ; %outercount is straightforward to widen, besides being in an outer loop.
 ; %innercount is currently blocked by lcssa, so is not widened.
 ; %inneriv can be widened only after proving it has no signed-overflow
 ;   based on the loop test.
 define void @nestedIV(i8* %address, i32 %limit) nounwind {
 entry:
  %limitdec = add i32 %limit, -1
  br label %outerloop
 ; CHECK: outerloop:
 ;
 ; Eliminate %ofs1 after widening outercount.
 ; CHECK-NOT: sext
 ; CHECK: getelementptr
 ;
 ; IV rewriting hoists a gep into this block. We don't like that.
 ; CHECK-NOT: getelementptr
 outerloop:
  %outercount   = phi i32 [ %outerpostcount, %outermerge ], [ 0, %entry ]
  %innercount = phi i32 [ %innercount.merge, %outermerge ], [ 0, %entry ]
  %outercountdec = add i32 %outercount, -1
  %ofs1 = sext i32 %outercountdec to i64
  %adr1 = getelementptr i8* %address, i64 %ofs1
  store i8 0, i8* %adr1
  br label %innerpreheader
 innerpreheader:
  %innerprecmp = icmp sgt i32 %limitdec, %innercount
  br i1 %innerprecmp, label %innerloop, label %outermerge
 ; CHECK: innerloop:
 ;
 ; Eliminate %ofs2 after widening inneriv.
 ; CHECK-NOT: sext
 ; CHECK: getelementptr
 ;
 ; FIXME: We should not increase the number of IVs in this loop.
 ; sext elimination plus LFTR results in 3 final IVs.
 ;
 ; FIXME: eliminate %ofs3 based the loop pre/post conditions
 ; even though innerpostiv is not NSW, thus sign extending innerpostiv
 ; does not yield the same expression as incrementing the widened inneriv.
 innerloop:
  %inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ]
  %innerpostiv = add i32 %inneriv, 1
  %ofs2 = sext i32 %inneriv to i64
  %adr2 = getelementptr i8* %address, i64 %ofs2
  store i8 0, i8* %adr2
  %ofs3 = sext i32 %innerpostiv to i64
  %adr3 = getelementptr i8* %address, i64 %ofs3
  store i8 0, i8* %adr3
  %innercmp = icmp sgt i32 %limitdec, %innerpostiv
  br i1 %innercmp, label %innerloop, label %innerexit
 innerexit:
  %innercount.lcssa = phi i32 [ %innerpostiv, %innerloop ]
  br label %outermerge
 ; CHECK: outermerge:
 ;
 ; Eliminate %ofs4 after widening outercount
 ; CHECK-NOT: sext
 ; CHECK: getelementptr
 ;
 ; TODO: Eliminate %ofs5 after removing lcssa
 outermerge:
  %innercount.merge = phi i32 [ %innercount.lcssa, %innerexit ], [ %innercount, %innerpreheader ]
  %ofs4 = sext i32 %outercount to i64
  %adr4 = getelementptr i8* %address, i64 %ofs4
  store i8 0, i8* %adr3
  %ofs5 = sext i32 %innercount.merge to i64
  %adr5 = getelementptr i8* %address, i64 %ofs5
  store i8 0, i8* %adr4
  %outerpostcount = add i32 %outercount, 1
  %tmp47 = icmp slt i32 %outerpostcount, %limit
  br i1 %tmp47, label %outerloop, label %return
 return:
  ret void
 }
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -0,0 +1,123 @@
 ; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
 ;
 ; Make sure that indvars isn't inserting canonical IVs.
 ; This is kinda hard to do until linear function test replacement is removed.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 define i32 @sum(i32* %arr, i32 %n) nounwind {
 entry:
  %precond = icmp slt i32 0, %n
  br i1 %precond, label %ph, label %return
 ph:
  br label %loop
 ; CHECK: loop:
 ;
 ; We should only have 2 IVs.
 ; CHECK: phi
 ; CHECK: phi
 ; CHECK-NOT: phi
 ;
 ; sext should be eliminated while preserving gep inboundsness.
 ; CHECK-NOT: sext
 ; CHECK: getelementptr inbounds
 loop:
  %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
  %s.01 = phi i32 [ 0, %ph ], [ %sinc, %loop ]
  %ofs = sext i32 %i.02 to i64
  %adr = getelementptr inbounds i32* %arr, i64 %ofs
  %val = load i32* %adr
  %sinc = add nsw i32 %s.01, %val
  %iinc = add nsw i32 %i.02, 1
  %cond = icmp slt i32 %iinc, %n
  br i1 %cond, label %loop, label %exit
 exit:
  %s.lcssa = phi i32 [ %sinc, %loop ]
  br label %return
 return:
  %s.0.lcssa = phi i32 [ %s.lcssa, %exit ], [ 0, %entry ]
  ret i32 %s.0.lcssa
 }
 define i64 @suml(i32* %arr, i32 %n) nounwind {
 entry:
  %precond = icmp slt i32 0, %n
  br i1 %precond, label %ph, label %return
 ph:
  br label %loop
 ; CHECK: loop:
 ;
 ; We should only have 2 IVs.
 ; CHECK: phi
 ; CHECK: phi
 ; CHECK-NOT: phi
 ;
 ; %ofs sext should be eliminated while preserving gep inboundsness.
 ; CHECK-NOT: sext
 ; CHECK: getelementptr inbounds
 ; %vall sext should obviously not be eliminated
 ; CHECK: sext
 loop:
  %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
  %s.01 = phi i64 [ 0, %ph ], [ %sinc, %loop ]
  %ofs = sext i32 %i.02 to i64
  %adr = getelementptr inbounds i32* %arr, i64 %ofs
  %val = load i32* %adr
  %vall = sext i32 %val to i64
  %sinc = add nsw i64 %s.01, %vall
  %iinc = add nsw i32 %i.02, 1
  %cond = icmp slt i32 %iinc, %n
  br i1 %cond, label %loop, label %exit
 exit:
  %s.lcssa = phi i64 [ %sinc, %loop ]
  br label %return
 return:
  %s.0.lcssa = phi i64 [ %s.lcssa, %exit ], [ 0, %entry ]
  ret i64 %s.0.lcssa
 }
 define void @outofbounds(i32* %first, i32* %last, i32 %idx) nounwind {
  %precond = icmp ne i32* %first, %last
  br i1 %precond, label %ph, label %return
 ; CHECK: ph:
 ; It's not indvars' job to perform LICM on %ofs
 ; CHECK-NOT: sext
 ph:
  br label %loop
 ; CHECK: loop:
 ;
 ; Preserve exactly one pointer type IV.
 ; CHECK: phi i32*
 ; CHECK-NOT: phi
 ;
 ; Don't create any extra adds.
 ; CHECK-NOT: add
 ;
 ; Preserve gep inboundsness, and don't factor it.
 ; CHECK: getelementptr inbounds i32* %ptriv, i32 1
 ; CHECK-NOT: add
 loop:
  %ptriv = phi i32* [ %first, %ph ], [ %ptrpost, %loop ]
  %ofs = sext i32 %idx to i64
  %adr = getelementptr inbounds i32* %ptriv, i64 %ofs
  store i32 3, i32* %adr
  %ptrpost = getelementptr inbounds i32* %ptriv, i32 1
  %cond = icmp ne i32* %ptrpost, %last
  br i1 %cond, label %loop, label %exit
 exit:
  br label %return
 return:
  ret void
 }