diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index cc9f9ccd553..4286ec336be 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -334,6 +334,12 @@ namespace llvm { /// found. BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB); + /// isNecessaryCond - Test whether the given CondValue value is a condition + /// which is at least as strict as the one described by Pred, LHS, and RHS. + bool isNecessaryCond(Value *Cond, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool Inverse); + /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a /// constant number of times, and the PHI node is just a recurrence diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 436b79dc07e..d1f6679a437 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -2813,7 +2813,6 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { const SCEV* BECount = CouldNotCompute; const SCEV* MaxBECount = CouldNotCompute; bool CouldNotComputeBECount = false; - bool CouldNotComputeMaxBECount = false; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BackedgeTakenInfo NewBTI = ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]); @@ -2826,25 +2825,13 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { } else if (!CouldNotComputeBECount) { if (BECount == CouldNotCompute) BECount = NewBTI.Exact; - else { - // TODO: More analysis could be done here. For example, a - // loop with a short-circuiting && operator has an exact count - // of the min of both sides. - CouldNotComputeBECount = true; - BECount = CouldNotCompute; - } - } - if (NewBTI.Max == CouldNotCompute) { - // We couldn't compute an maximum value for this exit, so - // we won't be able to compute an maximum value for the loop. - CouldNotComputeMaxBECount = true; - MaxBECount = CouldNotCompute; - } else if (!CouldNotComputeMaxBECount) { - if (MaxBECount == CouldNotCompute) - MaxBECount = NewBTI.Max; else - MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max); + BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact); } + if (MaxBECount == CouldNotCompute) + MaxBECount = NewBTI.Max; + else if (NewBTI.Max != CouldNotCompute) + MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max); } return BackedgeTakenInfo(BECount, MaxBECount); @@ -2925,9 +2912,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB) { - // Check if the controlling expression for this loop is an and or or. In - // such cases, an exact backedge-taken count may be infeasible, but a - // maximum count may still be feasible. + // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. @@ -3899,90 +3884,113 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, LoopEntryPredicate->isUnconditional()) continue; - ICmpInst *ICI = dyn_cast(LoopEntryPredicate->getCondition()); - if (!ICI) continue; - - // Now that we found a conditional branch that dominates the loop, check to - // see if it is the comparison we are looking for. - Value *PreCondLHS = ICI->getOperand(0); - Value *PreCondRHS = ICI->getOperand(1); - ICmpInst::Predicate Cond; - if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest) - Cond = ICI->getPredicate(); - else - Cond = ICI->getInversePredicate(); - - if (Cond == Pred) - ; // An exact match. - else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE) - ; // The actual condition is beyond sufficient. - else - // Check a few special cases. - switch (Cond) { - case ICmpInst::ICMP_UGT: - if (Pred == ICmpInst::ICMP_ULT) { - std::swap(PreCondLHS, PreCondRHS); - Cond = ICmpInst::ICMP_ULT; - break; - } - continue; - case ICmpInst::ICMP_SGT: - if (Pred == ICmpInst::ICMP_SLT) { - std::swap(PreCondLHS, PreCondRHS); - Cond = ICmpInst::ICMP_SLT; - break; - } - continue; - case ICmpInst::ICMP_NE: - // Expressions like (x >u 0) are often canonicalized to (x != 0), - // so check for this case by checking if the NE is comparing against - // a minimum or maximum constant. - if (!ICmpInst::isTrueWhenEqual(Pred)) - if (ConstantInt *CI = dyn_cast(PreCondRHS)) { - const APInt &A = CI->getValue(); - switch (Pred) { - case ICmpInst::ICMP_SLT: - if (A.isMaxSignedValue()) break; - continue; - case ICmpInst::ICMP_SGT: - if (A.isMinSignedValue()) break; - continue; - case ICmpInst::ICMP_ULT: - if (A.isMaxValue()) break; - continue; - case ICmpInst::ICMP_UGT: - if (A.isMinValue()) break; - continue; - default: - continue; - } - Cond = ICmpInst::ICMP_NE; - // NE is symmetric but the original comparison may not be. Swap - // the operands if necessary so that they match below. - if (isa(LHS)) - std::swap(PreCondLHS, PreCondRHS); - break; - } - continue; - default: - // We weren't able to reconcile the condition. - continue; - } - - if (!PreCondLHS->getType()->isInteger()) continue; - - const SCEV* PreCondLHSSCEV = getSCEV(PreCondLHS); - const SCEV* PreCondRHSSCEV = getSCEV(PreCondRHS); - if ((HasSameValue(LHS, PreCondLHSSCEV) && - HasSameValue(RHS, PreCondRHSSCEV)) || - (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) && - HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)))) + if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, + LoopEntryPredicate->getSuccessor(0) != PredecessorDest)) return true; } return false; } +/// isNecessaryCond - Test whether the given CondValue value is a condition +/// which is at least as strict as the one described by Pred, LHS, and RHS. +bool ScalarEvolution::isNecessaryCond(Value *CondValue, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool Inverse) { + // Recursivly handle And and Or conditions. + if (BinaryOperator *BO = dyn_cast(CondValue)) { + if (BO->getOpcode() == Instruction::And) { + if (!Inverse) + return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || + isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + } else if (BO->getOpcode() == Instruction::Or) { + if (Inverse) + return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || + isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + } + } + + ICmpInst *ICI = dyn_cast(CondValue); + if (!ICI) return false; + + // Now that we found a conditional branch that dominates the loop, check to + // see if it is the comparison we are looking for. + Value *PreCondLHS = ICI->getOperand(0); + Value *PreCondRHS = ICI->getOperand(1); + ICmpInst::Predicate Cond; + if (Inverse) + Cond = ICI->getInversePredicate(); + else + Cond = ICI->getPredicate(); + + if (Cond == Pred) + ; // An exact match. + else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE) + ; // The actual condition is beyond sufficient. + else + // Check a few special cases. + switch (Cond) { + case ICmpInst::ICMP_UGT: + if (Pred == ICmpInst::ICMP_ULT) { + std::swap(PreCondLHS, PreCondRHS); + Cond = ICmpInst::ICMP_ULT; + break; + } + return false; + case ICmpInst::ICMP_SGT: + if (Pred == ICmpInst::ICMP_SLT) { + std::swap(PreCondLHS, PreCondRHS); + Cond = ICmpInst::ICMP_SLT; + break; + } + return false; + case ICmpInst::ICMP_NE: + // Expressions like (x >u 0) are often canonicalized to (x != 0), + // so check for this case by checking if the NE is comparing against + // a minimum or maximum constant. + if (!ICmpInst::isTrueWhenEqual(Pred)) + if (ConstantInt *CI = dyn_cast(PreCondRHS)) { + const APInt &A = CI->getValue(); + switch (Pred) { + case ICmpInst::ICMP_SLT: + if (A.isMaxSignedValue()) break; + return false; + case ICmpInst::ICMP_SGT: + if (A.isMinSignedValue()) break; + return false; + case ICmpInst::ICMP_ULT: + if (A.isMaxValue()) break; + return false; + case ICmpInst::ICMP_UGT: + if (A.isMinValue()) break; + return false; + default: + return false; + } + Cond = ICmpInst::ICMP_NE; + // NE is symmetric but the original comparison may not be. Swap + // the operands if necessary so that they match below. + if (isa(LHS)) + std::swap(PreCondLHS, PreCondRHS); + break; + } + return false; + default: + // We weren't able to reconcile the condition. + return false; + } + + if (!PreCondLHS->getType()->isInteger()) return false; + + const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS); + const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS); + return (HasSameValue(LHS, PreCondLHSSCEV) && + HasSameValue(RHS, PreCondRHSSCEV)) || + (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) && + HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))); +} + /// getBECount - Subtract the end and start values and divide by the step, /// rounding up, to get the number of times the backedge is executed. Return /// CouldNotCompute if an intermediate computation overflows. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index c5591d70273..6d7abc02ebe 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -51,21 +51,26 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V, if (Argument *A = dyn_cast(V)) { // Check to see if there is already a cast! for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { + UI != E; ++UI) if ((*UI)->getType() == Ty) if (CastInst *CI = dyn_cast(cast(*UI))) if (CI->getOpcode() == opcode) { // If the cast isn't the first instruction of the function, move it. - if (BasicBlock::iterator(CI) != + if (BasicBlock::iterator(CI) != A->getParent()->getEntryBlock().begin()) { - // If the CastInst is the insert point, change the insert point. - if (CI == InsertPt) ++InsertPt; - // Splice the cast at the beginning of the entry block. - CI->moveBefore(A->getParent()->getEntryBlock().begin()); + // Recreate the cast at the beginning of the entry block. + // The old cast is left in place in case it is being used + // as an insert point. + Instruction *NewCI = + CastInst::Create(opcode, V, Ty, "", + A->getParent()->getEntryBlock().begin()); + NewCI->takeName(CI); + CI->replaceAllUsesWith(NewCI); + return NewCI; } return CI; } - } + Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(), A->getParent()->getEntryBlock().begin()); InsertedValues.insert(I); @@ -85,10 +90,13 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V, It = cast(I)->getNormalDest()->begin(); while (isa(It)) ++It; if (It != BasicBlock::iterator(CI)) { - // If the CastInst is the insert point, change the insert point. - if (CI == InsertPt) ++InsertPt; - // Splice the cast immediately after the operand in question. - CI->moveBefore(It); + // Recreate the cast at the beginning of the entry block. + // The old cast is left in place in case it is being used + // as an insert point. + Instruction *NewCI = CastInst::Create(opcode, V, Ty, "", It); + NewCI->takeName(CI); + CI->replaceAllUsesWith(NewCI); + return NewCI; } return CI; } @@ -497,8 +505,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { } } - Value *RestV = expand(Rest); - return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV))); + // Just do a normal add. Pre-expand the operands to suppress folding. + return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())), + SE.getUnknown(expand(Rest)))); } // {0,+,1} --> Insert a canonical induction variable into the loop! @@ -546,36 +555,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { getOrInsertCanonicalInductionVariable(L, Ty); // If this is a simple linear addrec, emit it now as a special case. - if (S->isAffine()) { // {0,+,F} --> i*F - Value *F = expandCodeFor(S->getOperand(1), Ty); - - // If the insert point is directly inside of the loop, emit the multiply at - // the insert point. Otherwise, L is a loop that is a parent of the insert - // point loop. If we can, move the multiply to the outer most loop that it - // is safe to be in. - BasicBlock::iterator MulInsertPt = getInsertionPoint(); - Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent()); - if (InsertPtLoop != L && InsertPtLoop && - L->contains(InsertPtLoop->getHeader())) { - do { - // If we cannot hoist the multiply out of this loop, don't. - if (!InsertPtLoop->isLoopInvariant(F)) break; - - BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader(); - - // If this loop hasn't got a preheader, we aren't able to hoist the - // multiply. - if (!InsertPtLoopPH) - break; - - // Otherwise, move the insert point to the preheader. - MulInsertPt = InsertPtLoopPH->getTerminator(); - InsertPtLoop = InsertPtLoop->getParentLoop(); - } while (InsertPtLoop != L); - } - - return InsertBinop(Instruction::Mul, I, F, MulInsertPt); - } + if (S->isAffine()) // {0,+,F} --> i*F + return + expand(SE.getTruncateOrNoop( + SE.getMulExpr(SE.getUnknown(I), + SE.getNoopOrAnyExtend(S->getOperand(1), + I->getType())), + Ty)); // If this is a chain of recurrences, turn it into a closed form, using the // folders, then expandCodeFor the closed form. This allows the folders to @@ -671,8 +657,31 @@ Value *SCEVExpander::expand(const SCEV *S) { InsertedExpressions.find(S); if (I != InsertedExpressions.end()) return I->second; - + + // Compute an insertion point for this SCEV object. Hoist the instructions + // as far out in the loop nest as possible. + BasicBlock::iterator InsertPt = getInsertionPoint(); + BasicBlock::iterator SaveInsertPt = InsertPt; + for (Loop *L = SE.LI->getLoopFor(InsertPt->getParent()); ; + L = L->getParentLoop()) + if (S->isLoopInvariant(L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && S->hasComputableLoopEvolution(L)) + InsertPt = L->getHeader()->getFirstNonPHI(); + while (isInsertedInstruction(InsertPt)) ++InsertPt; + break; + } + setInsertionPoint(InsertPt); + Value *V = visit(S); + + setInsertionPoint(SaveInsertPt); InsertedExpressions[S] = V; return V; } @@ -686,6 +695,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { assert(Ty->isInteger() && "Can only insert integer induction variables!"); const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), - SE.getIntegerSCEV(1, Ty), L); - return expand(H); + SE.getIntegerSCEV(1, Ty), L); + BasicBlock::iterator SaveInsertPt = getInsertionPoint(); + Value *V = expandCodeFor(H, 0, L->getHeader()->begin()); + setInsertionPoint(SaveInsertPt); + return V; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 6c20e7d1407..658b788e0ba 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -104,7 +104,8 @@ namespace { void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount); void RewriteIVExpressions(Loop *L, const Type *LargestType, - SCEVExpander &Rewriter); + SCEVExpander &Rewriter, + BasicBlock::iterator InsertPt); void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter); @@ -170,6 +171,8 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, } // Expand the code for the iteration count into the preheader of the loop. + assert(RHS->isLoopInvariant(L) && + "Computed iteration count is not loop invariant!"); BasicBlock *Preheader = L->getLoopPreheader(); Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), Preheader->getTerminator()); @@ -434,10 +437,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ExitingBlock, BI, Rewriter); } - Rewriter.setInsertionPoint(Header->getFirstNonPHI()); + BasicBlock::iterator InsertPt = Header->getFirstNonPHI(); // Rewrite IV-derived expressions. Clears the rewriter cache. - RewriteIVExpressions(L, LargestType, Rewriter); + RewriteIVExpressions(L, LargestType, Rewriter, InsertPt); // The Rewriter may only be used for isInsertedInstruction queries from this // point on. @@ -462,7 +465,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { } void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, - SCEVExpander &Rewriter) { + SCEVExpander &Rewriter, + BasicBlock::iterator InsertPt) { SmallVector DeadInsts; // Rewrite all induction variable expressions in terms of the canonical @@ -488,29 +492,17 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // Compute the final addrec to expand into code. const SCEV* AR = IU->getReplacementExpr(*UI); - Value *NewVal = 0; - if (AR->isLoopInvariant(L)) { - BasicBlock::iterator I = Rewriter.getInsertionPoint(); - // Expand loop-invariant values in the loop preheader. They will - // be sunk to the exit block later, if possible. - NewVal = - Rewriter.expandCodeFor(AR, UseTy, - L->getLoopPreheader()->getTerminator()); - Rewriter.setInsertionPoint(I); - ++NumReplaced; - } else { - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec, unless - // it can be expanded to a trivial value. - if (!Stride->isLoopInvariant(L)) - continue; + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) + continue; - // Now expand it into actual Instructions and patch it into place. - NewVal = Rewriter.expandCodeFor(AR, UseTy); - } + // Now expand it into actual Instructions and patch it into place. + Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); // Patch the new value into place. if (Op->hasName()) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 312b957948b..a877c4ea03c 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -409,16 +409,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, const SCEV* NewValSCEV = SE->getUnknown(Base); - // If there is no immediate value, skip the next part. - if (!Imm->isZero()) { - // If we are inserting the base and imm values in the same block, make sure - // to adjust the IP position if insertion reused a result. - if (IP == BaseInsertPt) - IP = Rewriter.getInsertionPoint(); - - // Always emit the immediate (if non-zero) into the same block as the user. - NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); - } + // Always emit the immediate into the same block as the user. + NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll index 62382c6d78c..ca6204c101e 100644 --- a/test/CodeGen/X86/pr3495.ll +++ b/test/CodeGen/X86/pr3495.ll @@ -1,6 +1,6 @@ ; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2 ; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies} -; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38 +; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39 ; PR3495 ; The loop reversal kicks in once here, resulting in one fewer instruction.