diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index fcd9caa61f2..22fbb35cdb9 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -175,11 +175,11 @@ class IVUsers : public LoopPass { ScalarEvolution *SE; SmallPtrSet Processed; +public: /// IVUses - A list of all tracked IV uses of induction variable expressions /// we are interested in. ilist IVUses; -public: /// IVUsesByStride - A mapping from the strides in StrideOrder to the /// uses in IVUses. std::map IVUsesByStride; diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 2294e5352cb..7419cdc4440 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -976,6 +976,13 @@ public: void removeBlock(BasicBlock *BB) { LI.removeBlock(BB); } + + static bool isNotAlreadyContainedIn(const Loop *SubLoop, + const Loop *ParentLoop) { + return + LoopInfoBase::isNotAlreadyContainedIn(SubLoop, + ParentLoop); + } }; diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 627dbbb3288..37747b65174 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -53,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (newLoop->contains(L->getHeader())) + if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) return false; } return true; @@ -307,7 +307,6 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) AddUsersIfInteresting(I); - Processed.clear(); return false; } @@ -370,7 +369,7 @@ void IVUsers::dump() const { void IVUsers::releaseMemory() { IVUsesByStride.clear(); StrideOrder.clear(); - IVUses.clear(); + Processed.clear(); } void IVStrideUse::deleted() { diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 297dd316762..cae32d3361c 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -31,6 +31,10 @@ namespace llvm { bool EnableFastISel; } +static cl::opt X1("x1"); +static cl::opt X2("x2"); +static cl::opt X3("x3"); +static cl::opt X4("x4"); static cl::opt DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -239,6 +243,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true)); } + if (X1) + PM.add(createPrintFunctionPass("\n\n" + "*** Before LSR ***\n", + &errs())); + // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -246,6 +255,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs())); } + if (X2) + PM.add(createPrintFunctionPass("\n\n" + "*** After LSR ***\n", + &errs())); + // Turn exception handling constructs into something the code generators can // handle. switch (getMCAsmInfo()->getExceptionHandlingType()) @@ -268,9 +282,19 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Make sure that no unreachable blocks are instruction selected. PM.add(createUnreachableBlockEliminationPass()); + if (X3) + PM.add(createPrintFunctionPass("\n\n" + "*** Before CGP ***\n", + &errs())); + if (OptLevel != CodeGenOpt::None && !DisableCGP) PM.add(createCodeGenPreparePass(getTargetLowering())); + if (X4) + PM.add(createPrintFunctionPass("\n\n" + "*** After CGP ***\n", + &errs())); + PM.add(createStackProtectorPass(getTargetLowering())); if (PrintISelInput) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index e4c4ae5f399..372616c8699 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -48,7 +48,7 @@ namespace { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; - ProfileInfo *PI; + ProfileInfo *PFI; /// BackEdges - Keep a set of all the loop back edges. /// @@ -99,7 +99,7 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) { bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; - PI = getAnalysisIfAvailable(); + PFI = getAnalysisIfAvailable(); // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -288,9 +288,9 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (PI) { - PI->replaceAllUses(BB, DestBB); - PI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); + if (PFI) { + PFI->replaceAllUses(BB, DestBB); + PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); @@ -368,9 +368,9 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, // If we found a workable predecessor, change TI to branch to Succ. if (FoundMatch) { - ProfileInfo *PI = P->getAnalysisIfAvailable(); - if (PI) - PI->splitEdge(TIBB, Dest, Pred); + ProfileInfo *PFI = P->getAnalysisIfAvailable(); + if (PFI) + PFI->splitEdge(TIBB, Dest, Pred); Dest->removePredecessor(TIBB); TI->setSuccessor(SuccNum, Pred); return; diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 8c2e9b6ff57..600937693b9 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -24,14 +24,18 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Type.h" #include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" @@ -81,6 +85,8 @@ namespace { class LoopStrengthReduce : public LoopPass { IVUsers *IU; + LoopInfo *LI; + DominatorTree *DT; ScalarEvolution *SE; bool Changed; @@ -88,6 +94,10 @@ namespace { /// particular stride. std::map IVsByStride; + /// StrideNoReuse - Keep track of all the strides whose ivs cannot be + /// reused (nor should they be rewritten to reuse other strides). + SmallSet StrideNoReuse; + /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. SmallVector DeadInsts; @@ -99,7 +109,8 @@ namespace { public: static char ID; // Pass ID, replacement for typeid explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) {} + LoopPass(&ID), TLI(tli) { + } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -107,11 +118,13 @@ namespace { // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved("loops"); - AU.addPreserved("domfrontier"); - AU.addPreserved("domtree"); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -215,17 +228,19 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { if (DeadInsts.empty()) return; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); + Instruction *I = dyn_cast_or_null(DeadInsts.back()); + DeadInsts.pop_back(); if (I == 0 || !isInstructionTriviallyDead(I)) continue; - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { if (Instruction *U = dyn_cast(*OI)) { *OI = 0; if (U->use_empty()) DeadInsts.push_back(U); } + } I->eraseFromParent(); Changed = true; @@ -285,6 +300,9 @@ namespace { /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. struct BasedUser { + /// SE - The current ScalarEvolution object. + ScalarEvolution *SE; + /// Base - The Base value for the PHI node that needs to be inserted for /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this @@ -316,9 +334,9 @@ namespace { bool isUseOfPostIncrementedValue; BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : Base(IVSU.getOffset()), Inst(IVSU.getUser()), + : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(se->getIntegerSCEV(0, Base->getType())), + Imm(SE->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the @@ -327,14 +345,14 @@ namespace { void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl &DeadInsts, - ScalarEvolution *SE); + LoopInfo &LI, + SmallVectorImpl &DeadInsts); Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE); + Instruction *IP, Loop *L, + LoopInfo &LI); void dump() const; }; } @@ -348,12 +366,27 @@ void BasedUser::dump() const { Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE) { - Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); + Instruction *IP, Loop *L, + LoopInfo &LI) { + // Figure out where we *really* want to insert this code. In particular, if + // the user is inside of a loop that is nested inside of L, we really don't + // want to insert this expression before the user, we'd rather pull it out as + // many loops as possible. + Instruction *BaseInsertPt = IP; + + // Figure out the most-nested loop that IP is in. + Loop *InsertLoop = LI.getLoopFor(IP->getParent()); + + // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out + // the preheader of the outer-most loop where NewBase is not loop invariant. + if (L->contains(IP->getParent())) + while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { + BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); + InsertLoop = InsertLoop->getParentLoop(); + } + + Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); - // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to - // re-analyze it. const SCEV *NewValSCEV = SE->getUnknown(Base); // Always emit the immediate into the same block as the user. @@ -372,8 +405,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl &DeadInsts, - ScalarEvolution *SE) { + LoopInfo &LI, + SmallVectorImpl &DeadInsts) { if (!isa(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -402,7 +435,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, } Value *NewVal = InsertCodeForBaseAtPosition(NewBase, OperandValToReplace->getType(), - Rewriter, InsertPt, SE); + Rewriter, InsertPt, L, LI); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); @@ -464,7 +497,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, SE); + Rewriter, InsertPt, L, LI); DEBUG(errs() << " Changing PHI use to "); DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); @@ -940,13 +973,17 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector& UsersToProcess) { + if (StrideNoReuse.count(Stride)) + return SE->getIntegerSCEV(0, Stride->getType()); + if (const SCEVConstant *SC = dyn_cast(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { std::map::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa(SI->first)) + if (SI == IVsByStride.end() || !isa(SI->first) || + StrideNoReuse.count(SI->first)) continue; // The other stride has no uses, don't reuse it. std::map::iterator UI = @@ -1705,8 +1742,8 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, - Rewriter, L, this, - DeadInsts, SE); + Rewriter, L, this, *LI, + DeadInsts); // Mark old value we replaced as possibly dead, so that it is eliminated // if we just replaced the last use of that value. @@ -2670,6 +2707,8 @@ bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis(); + LI = &getAnalysis(); + DT = &getAnalysis(); SE = &getAnalysis(); Changed = false; @@ -2715,15 +2754,16 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // After all sharing is done, see if we can adjust the loop to test against // zero instead of counting up to a maximum. This is usually faster. OptimizeLoopCountIV(L); - - // We're done analyzing this loop; release all the state we built up for it. - IVsByStride.clear(); - - // Clean up after ourselves - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(); } + // We're done analyzing this loop; release all the state we built up for it. + IVsByStride.clear(); + StrideNoReuse.clear(); + + // Clean up after ourselves + if (!DeadInsts.empty()) + DeleteTriviallyDeadInstructions(); + // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. DeleteDeadPHIs(L->getHeader());