diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 0078abd4d52..d7b11b85460 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -79,6 +79,12 @@ static cl::opt DisableIVRewrite( "disable-iv-rewrite", cl::Hidden, cl::desc("Disable canonical induction variable rewriting")); +// Temporary flag for use with -disable-iv-rewrite to force a canonical IV for +// LFTR purposes. +static cl::opt ForceLFTR( + "force-lftr", cl::Hidden, + cl::desc("Enable forced linear function test replacement")); + namespace { class IndVarSimplify : public LoopPass { IVUsers *IU; @@ -140,9 +146,8 @@ namespace { void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); - ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - PHINode *IndVar, - SCEVExpander &Rewriter); + Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, + PHINode *IndVar, SCEVExpander &Rewriter); void SinkUnusedInvariants(Loop *L); }; @@ -1014,7 +1019,7 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); return 0; } - // We assume that block terminators are not SCEVable. We wouldn't want to + // Assume block terminators cannot evaluate to a recurrence. We can't to // insert a Trunc after a terminator if there happens to be a critical edge. assert(NarrowUse != NarrowUse->getParent()->getTerminator() && "SCEV is not expected to evaluate a block terminator"); @@ -1302,10 +1307,6 @@ static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { // Get the symbolic expression for this instruction. const SCEV *S = SE->getSCEV(I); - // We assume that terminators are not SCEVable. - assert((!S || I != I->getParent()->getTerminator()) && - "can't fold terminators"); - // Only consider affine recurrences. const SCEVAddRecExpr *AR = dyn_cast(S); if (AR && AR->getLoop() == L) @@ -1471,7 +1472,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, } } - if (!DisableIVRewrite) + if (!DisableIVRewrite || ForceLFTR) return false; // Recurse past add expressions, which commonly occur in the @@ -1522,7 +1523,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { /// getBackedgeIVType - Get the widest type used by the loop test after peeking /// through Truncs. /// -/// TODO: Unnecessary if LFTR does not force a canonical IV. +/// TODO: Unnecessary when ForceLFTR is removed. static Type *getBackedgeIVType(Loop *L) { if (!L->getExitingBlock()) return 0; @@ -1549,12 +1550,198 @@ static Type *getBackedgeIVType(Loop *L) { return Ty; } +/// isLoopInvariant - Perform a quick domtree based check for loop invariance +/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems +/// gratuitous for this purpose. +static bool isLoopInvariant(Value *V, Loop *L, DominatorTree *DT) { + Instruction *Inst = dyn_cast(V); + if (!Inst) + return true; + + return DT->properlyDominates(Inst->getParent(), L->getHeader()); +} + +/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop +/// invariant value to the phi. +static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { + Instruction *IncI = dyn_cast(IncV); + if (!IncI) + return 0; + + switch (IncI->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + break; + case Instruction::GetElementPtr: + // An IV counter must preserve its type. + if (IncI->getNumOperands() == 2) + break; + default: + return 0; + } + + PHINode *Phi = dyn_cast(IncI->getOperand(0)); + if (Phi && Phi->getParent() == L->getHeader()) { + if (isLoopInvariant(IncI->getOperand(1), L, DT)) + return Phi; + return 0; + } + if (IncI->getOpcode() == Instruction::GetElementPtr) + return 0; + + // Allow add/sub to be commuted. + Phi = dyn_cast(IncI->getOperand(1)); + if (Phi && Phi->getParent() == L->getHeader()) { + if (isLoopInvariant(IncI->getOperand(0), L, DT)) + return Phi; + } + return 0; +} + +/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show +/// that the current exit test is already sufficiently canonical. +static bool needsLFTR(Loop *L, DominatorTree *DT) { + assert(L->getExitingBlock() && "expected loop exit"); + + BasicBlock *LatchBlock = L->getLoopLatch(); + // Don't bother with LFTR if the loop is not properly simplified. + if (!LatchBlock) + return false; + + BranchInst *BI = dyn_cast(L->getExitingBlock()->getTerminator()); + assert(BI && "expected exit branch"); + + // Do LFTR to simplify the exit condition to an ICMP. + ICmpInst *Cond = dyn_cast(BI->getCondition()); + if (!Cond) + return true; + + // Do LFTR to simplify the exit ICMP to EQ/NE + ICmpInst::Predicate Pred = Cond->getPredicate(); + if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) + return true; + + // Look for a loop invariant RHS + Value *LHS = Cond->getOperand(0); + Value *RHS = Cond->getOperand(1); + if (!isLoopInvariant(RHS, L, DT)) { + if (!isLoopInvariant(LHS, L, DT)) + return true; + std::swap(LHS, RHS); + } + // Look for a simple IV counter LHS + PHINode *Phi = dyn_cast(LHS); + if (!Phi) + Phi = getLoopPhiForCounter(LHS, L, DT); + + if (!Phi) + return true; + + // Do LFTR if the exit condition's IV is *not* a simple counter. + Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch()); + return Phi != getLoopPhiForCounter(IncV, L, DT); +} + +/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to +/// be rewritten) loop exit test. +static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { + int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); + Value *IncV = Phi->getIncomingValue(LatchIdx); + + for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end(); + UI != UE; ++UI) { + if (*UI != Cond && *UI != IncV) return false; + } + + for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end(); + UI != UE; ++UI) { + if (*UI != Cond && *UI != Phi) return false; + } + return true; +} + +/// FindLoopCounter - Find an affine IV in canonical form. +/// +/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount +/// +/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. +/// This is difficult in general for SCEV because of potential overflow. But we +/// could at least handle constant BECounts. +static PHINode * +FindLoopCounter(Loop *L, const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) { + // I'm not sure how BECount could be a pointer type, but we definitely don't + // want to LFTR that. + if (BECount->getType()->isPointerTy()) + return 0; + + uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); + + Value *Cond = + cast(L->getExitingBlock()->getTerminator())->getCondition(); + + // Loop over all of the PHI nodes, looking for a simple counter. + PHINode *BestPhi = 0; + const SCEV *BestInit = 0; + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "needsLFTR should guarantee a loop latch"); + + for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { + PHINode *Phi = cast(I); + if (!SE->isSCEVable(Phi->getType())) + continue; + + const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(Phi)); + if (!AR || AR->getLoop() != L || !AR->isAffine()) + continue; + + // AR may be a pointer type, while BECount is an integer type. + // AR may be wider than BECount. With eq/ne tests overflow is immaterial. + // AR may not be a narrower type, or we may never exit. + uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); + if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth))) + continue; + + const SCEV *Step = dyn_cast(AR->getStepRecurrence(*SE)); + if (!Step || !Step->isOne()) + continue; + + int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); + Value *IncV = Phi->getIncomingValue(LatchIdx); + if (getLoopPhiForCounter(IncV, L, DT) != Phi) + continue; + + const SCEV *Init = AR->getStart(); + + if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { + // Don't force a live loop counter if another IV can be used. + if (AlmostDeadIV(Phi, LatchBlock, Cond)) + continue; + + // Prefer to count-from-zero. This is a more "canonical" counter form. It + // also prefers integer to pointer IVs. + if (BestInit->isZero() != Init->isZero()) { + if (BestInit->isZero()) + continue; + } + // If two IVs both count from zero or both count from nonzero then the + // narrower is likely a dead phi that has been widened. Use the wider phi + // to allow the other to be eliminated. + if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) + continue; + } + BestPhi = Phi; + BestInit = Init; + } + return BestPhi; +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the /// SCEV analysis can determine a loop-invariant trip count of the loop, which /// is actually a much broader range than just linear tests. -ICmpInst *IndVarSimplify:: +Value *IndVarSimplify:: LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, @@ -1562,62 +1749,118 @@ LinearFunctionTestReplace(Loop *L, assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + // In DisableIVRewrite mode, IndVar is not necessarily a canonical IV. In this + // mode, LFTR can ignore IV overflow and truncate to the width of + // BECount. This avoids materializing the add(zext(add)) expression. + Type *CntTy = DisableIVRewrite ? + BackedgeTakenCount->getType() : IndVar->getType(); + + const SCEV *IVLimit = BackedgeTakenCount; + // If the exiting block is not the same as the backedge block, we must compare // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. Value *CmpIndVar; - const SCEV *RHS = BackedgeTakenCount; if (L->getExitingBlock() == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. - const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); const SCEV *N = - SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); - if ((isa(N) && !N->isZero()) || - SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { - // No overflow. Cast the sum. - RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); - } else { - // Potential overflow. Cast before doing the add. - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - RHS = SE->getAddExpr(RHS, - SE->getConstant(IndVar->getType(), 1)); + SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1)); + if (CntTy == IVLimit->getType()) + IVLimit = N; + else { + const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0); + if ((isa(N) && !N->isZero()) || + SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + // No overflow. Cast the sum. + IVLimit = SE->getTruncateOrZeroExtend(N, CntTy); + } else { + // Potential overflow. Cast before doing the add. + IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); + IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1)); + } } - // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); } else { // We have to use the preincremented value... - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); + IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); CmpIndVar = IndVar; } + // For unit stride, IVLimit = Start + BECount with 2's complement overflow. + // So for, non-zero start compute the IVLimit here. + bool isPtrIV = false; + Type *CmpTy = CntTy; + const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(IndVar)); + assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); + if (!AR->getStart()->isZero()) { + assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); + const SCEV *IVInit = AR->getStart(); + + // For pointer types, sign extend BECount in order to materialize a GEP. + // Note that for DisableIVRewrite, we never run SCEVExpander on a + // pointer type, because we must preserve the existing GEPs. Instead we + // directly generate a GEP later. + if (IVInit->getType()->isPointerTy()) { + isPtrIV = true; + CmpTy = SE->getEffectiveSCEVType(IVInit->getType()); + IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy); + } + // For integer types, truncate the IV before computing IVInit + BECount. + else { + if (SE->getTypeSizeInBits(IVInit->getType()) + > SE->getTypeSizeInBits(CmpTy)) + IVInit = SE->getTruncateExpr(IVInit, CmpTy); + + IVLimit = SE->getAddExpr(IVInit, IVLimit); + } + } // Expand the code for the iteration count. - assert(SE->isLoopInvariant(RHS, L) && + IRBuilder<> Builder(BI); + + assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); - Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI); + + // Create a gep for IVInit + IVLimit from on an existing pointer base. + assert(isPtrIV == IndVar->getType()->isPointerTy() && + "IndVar type must match IVInit type"); + if (isPtrIV) { + Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); + assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter"); + const PointerType *PointerTy = cast(IVStart->getType()); + assert(SE->getSizeOfExpr(PointerTy->getElementType())->isOne() && + "unit stride pointer IV must be i8*"); + + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit"); + Builder.SetInsertPoint(BI); + } // Insert a new icmp_ne or icmp_eq instruction before the branch. - ICmpInst::Predicate Opcode; + ICmpInst::Predicate P; if (L->contains(BI->getSuccessor(0))) - Opcode = ICmpInst::ICMP_NE; + P = ICmpInst::ICMP_NE; else - Opcode = ICmpInst::ICMP_EQ; + P = ICmpInst::ICMP_EQ; DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" << " LHS:" << *CmpIndVar << '\n' << " op:\t" - << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" - << " RHS:\t" << *RHS << "\n"); + << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *ExitCnt << "\n" + << " Expr:\t" << *IVLimit << "\n"); - ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); - Cond->setDebugLoc(BI->getDebugLoc()); + if (SE->getTypeSizeInBits(CmpIndVar->getType()) + > SE->getTypeSizeInBits(CmpTy)) { + CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv"); + } + + Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); Value *OrigCond = BI->getCondition(); // It's tempting to use replaceAllUsesWith here to fully replace the old // comparison, but that's not immediately safe, since users of the old @@ -1784,8 +2027,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // a canonical induction variable should be inserted. Type *LargestType = 0; bool NeedCannIV = false; + bool ReuseIVForExit = DisableIVRewrite && !ForceLFTR; bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); - if (ExpandBECount) { + if (ExpandBECount && !ReuseIVForExit) { // If we have a known trip count and a single exit block, we'll be // rewriting the loop exit test condition below, which requires a // canonical induction variable. @@ -1848,15 +2092,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI()); } } - + else if (ExpandBECount && ReuseIVForExit && needsLFTR(L, DT)) { + IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD); + } // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - ICmpInst *NewICmp = 0; - if (ExpandBECount) { - assert(canExpandBackedgeTakenCount(L, SE) && - "canonical IV disrupted BackedgeTaken expansion"); - assert(NeedCannIV && - "LinearFunctionTestReplace requires a canonical induction variable"); + Value *NewICmp = 0; + if (ExpandBECount && IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead any // pass that uses the SCEVExpander must do it. This does not work well for @@ -1894,9 +2136,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // For completeness, inform IVUsers of the IV use in the newly-created // loop exit test instruction. - if (NewICmp && IU) - IU->AddUsersIfInteresting(cast(NewICmp->getOperand(0))); - + if (IU && NewICmp) { + ICmpInst *NewICmpInst = dyn_cast(NewICmp); + if (NewICmpInst) + IU->AddUsersIfInteresting(cast(NewICmpInst->getOperand(0))); + } // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); // Check a post-condition. diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll index 9e635fdc006..da7ecb66c54 100644 --- a/test/Transforms/IndVarSimplify/ada-loops.ll +++ b/test/Transforms/IndVarSimplify/ada-loops.ll @@ -9,10 +9,9 @@ ; Note that all four functions should actually be converted to ; memset. However, this test case validates indvars behavior. We ; don't check that phis are "folded together" because that is a job -; for loop strength reduction. But indvars must remove sext, zext, -; trunc, and add i8. +; for loop strength reduction. But indvars must remove sext, zext, and add i8. ; -; CHECK-NOT: {{sext|zext|trunc|add i8}} +; CHECK-NOT: {{sext|zext|add i8}} ; ModuleID = 'ada.bc' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32" diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll new file mode 100644 index 00000000000..6ccd1a424c7 --- /dev/null +++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -0,0 +1,230 @@ +; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s +; +; Make sure that indvars can perform LFTR without a canonical IV. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Perform LFTR using the original pointer-type IV. + +; for(char* p = base; p < base + n; ++p) { +; *p = p-base; +; } +define void @ptriv(i8* %base, i32 %n) nounwind { +entry: + %idx.ext = sext i32 %n to i64 + %add.ptr = getelementptr inbounds i8* %base, i64 %idx.ext + %cmp1 = icmp ult i8* %base, %add.ptr + br i1 %cmp1, label %for.body, label %for.end + +; CHECK: for.body: +; CHECK: phi i8* +; CHECK-NOT: phi +; CHECK-NOT: add +; CHECK: icmp ne i8* +; CHECK: br i1 +for.body: + %p.02 = phi i8* [ %base, %entry ], [ %incdec.ptr, %for.body ] + ; cruft to make the IV useful + %sub.ptr.lhs.cast = ptrtoint i8* %p.02 to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %base to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %conv = trunc i64 %sub.ptr.sub to i8 + store i8 %conv, i8* %p.02 + %incdec.ptr = getelementptr inbounds i8* %p.02, i32 1 + %cmp = icmp ult i8* %incdec.ptr, %add.ptr + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +; It would be nice if SCEV and any loop analysis could assume that +; preheaders exist. Unfortunately it is not always the case. This test +; checks that SCEVExpander can handle an outer loop that has not yet +; been simplified. As a result, the inner loop's exit test will not be +; rewritten. +define void @expandOuterRecurrence(i32 %arg) nounwind { +entry: + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %outer, label %exit + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp slt i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +; CHECK: inner: +; CHECK: icmp slt +; CHECK: br i1 +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +; CHECK: outer.inc: +; CHECK: icmp ne +; CHECK: br i1 +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %sub1 + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + +; Force SCEVExpander to look for an existing well-formed phi. +; Perform LFTR without generating extra preheader code. +define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector, + i32 %irow, i32 %ilead) nounwind { +; CHECK: entry: +; CHECK-NOT: zext +; CHECK-NOT: add +; CHECK: loop: +; CHECK: phi i64 +; CHECK: phi i64 +; CHECK-NOT: phi +; CHECK: icmp ne +; CHECK: br i1 +entry: + %cmp = icmp slt i32 1, %irow + br i1 %cmp, label %loop, label %return + +loop: + %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ] + %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ] + %diagidx = add nsw i32 %rowidx, %i + %diagidxw = sext i32 %diagidx to i64 + %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw + %v1 = load double* %matrixp + %iw = sext i32 %i to i64 + %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw + %v2 = load double* %vectorp + %row.inc = add nsw i32 %rowidx, %ilead + %i.inc = add nsw i32 %i, 1 + %cmp196 = icmp slt i32 %i.inc, %irow + br i1 %cmp196, label %loop, label %return + +return: + ret void +} + +; Avoid generating extra code to materialize a trip count. Skip LFTR. +define void @unguardedloop([0 x double]* %matrix, [0 x double]* %vector, + i32 %irow, i32 %ilead) nounwind { +entry: + br label %loop + +; CHECK: entry: +; CHECK-NOT: zext +; CHECK-NOT: add +; CHECK: loop: +; CHECK: phi i64 +; CHECK: phi i64 +; CHECK-NOT: phi +; CHECK: icmp slt +; CHECK: br i1 +loop: + %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ] + %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ] + %diagidx = add nsw i32 %rowidx, %i + %diagidxw = sext i32 %diagidx to i64 + %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw + %v1 = load double* %matrixp + %iw = sext i32 %i to i64 + %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw + %v2 = load double* %vectorp + %row.inc = add nsw i32 %rowidx, %ilead + %i.inc = add nsw i32 %i, 1 + %cmp196 = icmp slt i32 %i.inc, %irow + br i1 %cmp196, label %loop, label %return + +return: + ret void +} + +; Remove %i which is only used by the exit test. +; Verify that SCEV can still compute a backedge count from the sign +; extended %n, used for pointer comparison by LFTR. +define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind { +entry: + %x.ext = sext i32 %x to i64 + %add.ptr = getelementptr inbounds i8* %base, i64 %x.ext + %y.ext = sext i32 %y to i64 + %add.ptr10 = getelementptr inbounds i8* %add.ptr, i64 %y.ext + %lim = add i32 %x, %n + %cmp.ph = icmp ult i32 %x, %lim + br i1 %cmp.ph, label %loop, label %exit + +; CHECK: loop: +; CHECK: phi i8* +; CHECK-NOT: phi +; CHECK: getelementptr +; CHECK: store +; CHECK: icmp ne i8* +; CHECK: br i1 +loop: + %i = phi i32 [ %x, %entry ], [ %inc, %loop ] + %aptr = phi i8* [ %add.ptr10, %entry ], [ %incdec.ptr, %loop ] + %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1 + store i8 3, i8* %aptr + %inc = add i32 %i, 1 + %cmp = icmp ult i32 %inc, %lim + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; Exercise backedge taken count verification with a never-taken loop. +define void @nevertaken() nounwind uwtable ssp { +entry: + br label %loop + +; CHECK: loop: +; CHECK-NOT: phi +; CHECK-NOT: add +; CHECK-NOT: icmp +; CHECK: exit: +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add nsw i32 %i, 1 + %cmp = icmp sle i32 %inc, 0 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; Test LFTR on an IV whose recurrence start is a non-unit pointer type. +define void @aryptriv([256 x i8]* %base, i32 %n) nounwind { +entry: + %ivstart = getelementptr inbounds [256 x i8]* %base, i32 0, i32 0 + %ivend = getelementptr inbounds [256 x i8]* %base, i32 0, i32 %n + %cmp.ph = icmp ult i8* %ivstart, %ivend + br i1 %cmp.ph, label %loop, label %exit + +; CHECK: loop: +; CHECK: phi i8* +; CHECK-NOT: phi +; CHECK: getelementptr +; CHECK: store +; CHECK: icmp ne i8* +; CHECK: br i1 +loop: + %aptr = phi i8* [ %ivstart, %entry ], [ %incdec.ptr, %loop ] + %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1 + store i8 3, i8* %aptr + %cmp = icmp ult i8* %incdec.ptr, %ivend + br i1 %cmp, label %loop, label %exit + +exit: + ret void +}