diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 9f142a6b357..a5e5ffec695 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -317,7 +317,82 @@ struct FindConstantPointers { } bool isDone() const { return !IndexIsConstant; } }; +} // End anonymous namespace. +namespace { +/// \brief Struct to represent a GEP whose start and step are known fixed +/// offsets from a base address due to SCEV's analysis. +struct SCEVGEPDescriptor { + Value *BaseAddr; + unsigned Start; + unsigned Step; +}; +} // End anonymous namespace. + +/// \brief Build a cache of all the GEP instructions which SCEV can describe. +/// +/// Visit all GEPs in the loop and find those which after complete loop +/// unrolling would become a constant, or BaseAddress+Constant. For those where +/// we can identify small constant starts and steps from a base address, return +/// a map from the GEP to the base, start, and step relevant for that GEP. This +/// is essentially a simplified and fast to query form of the SCEV analysis +/// which we can afford to look into repeatedly for different iterations of the +/// loop. +static SmallDenseMap +buildSCEVGEPCache(const Loop &L, ScalarEvolution &SE) { + SmallDenseMap Cache; + + for (auto BB : L.getBlocks()) { + for (Instruction &I : *BB) { + if (GetElementPtrInst *GEP = dyn_cast(&I)) { + Value *V = cast(GEP); + if (!SE.isSCEVable(V->getType())) + continue; + const SCEV *S = SE.getSCEV(V); + // FIXME: Hoist the initialization out of the loop. + FindConstantPointers Visitor(&L, SE); + SCEVTraversal T(Visitor); + // Try to find (BaseAddress+Step+Offset) tuple. + // If succeeded, save it to the cache - it might help in folding + // loads. + T.visitAll(S); + if (!Visitor.IndexIsConstant || !Visitor.BaseAddress) + continue; + + const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress); + if (BaseAddrSE->getType() != S->getType()) + continue; + const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE); + const SCEVAddRecExpr *AR = dyn_cast(OffSE); + + if (!AR) + continue; + + const SCEVConstant *StepSE = + dyn_cast(AR->getStepRecurrence(SE)); + const SCEVConstant *StartSE = dyn_cast(AR->getStart()); + if (!StepSE || !StartSE) + continue; + + // Check and skip caching if doing so would require lots of bits to + // avoid overflow. + APInt Start = StartSE->getValue()->getValue(); + APInt Step = StepSE->getValue()->getValue(); + if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32) + continue; + + // We found a cacheable SCEV model for the GEP. + Cache[V] = {Visitor.BaseAddress, + (unsigned)Start.getLimitedValue(), + (unsigned)Step.getLimitedValue()}; + } + } + } + + return Cache; +} + +namespace { // This class is used to get an estimate of the optimization effects that we // could get from complete loop unrolling. It comes from the fact that some // loads might be replaced with concrete constant values and that could trigger @@ -338,12 +413,6 @@ class UnrollAnalyzer : public InstVisitor { typedef InstVisitor Base; friend class InstVisitor; - struct SCEVGEPDescriptor { - Value *BaseAddr; - unsigned Start; - unsigned Step; - }; - /// \brief The loop we're going to analyze. const Loop *L; @@ -364,13 +433,13 @@ class UnrollAnalyzer : public InstVisitor { // To avoid requesting SCEV info on every iteration, request it once, and // for each value that would become ConstAddress+Constant after loop // unrolling, save the corresponding data. - SmallDenseMap SCEVCache; + SmallDenseMap SCEVGEPCache; /// \brief Number of currently simulated iteration. /// /// If an expression is ConstAddress+Constant, then the Constant is /// Start + Iteration*Step, where Start and Step could be obtained from - /// SCEVCache. + /// SCEVGEPCache. unsigned Iteration; /// \brief Upper threshold for complete unrolling. @@ -415,8 +484,8 @@ class UnrollAnalyzer : public InstVisitor { if (Constant *SimplifiedAddrOp = SimplifiedValues.lookup(AddrOp)) AddrOp = SimplifiedAddrOp; - auto It = SCEVCache.find(AddrOp); - if (It == SCEVCache.end()) + auto It = SCEVGEPCache.find(AddrOp); + if (It == SCEVGEPCache.end()) return false; SCEVGEPDescriptor GEPDesc = It->second; @@ -451,61 +520,6 @@ class UnrollAnalyzer : public InstVisitor { return true; } - /// Visit all GEPs in the loop and find those which after complete loop - /// unrolling would become a constant, or BaseAddress+Constant. - /// - /// Such GEPs could allow to evaluate a load to a constant later - for now we - /// just store the corresponding BaseAddress and StartValue with StepValue in - /// the SCEVCache. - void cacheSCEVResults() { - for (auto BB : L->getBlocks()) { - for (Instruction &I : *BB) { - if (GetElementPtrInst *GEP = dyn_cast(&I)) { - Value *V = cast(GEP); - if (!SE.isSCEVable(V->getType())) - continue; - const SCEV *S = SE.getSCEV(V); - // FIXME: Hoist the initialization out of the loop. - FindConstantPointers Visitor(L, SE); - SCEVTraversal T(Visitor); - // Try to find (BaseAddress+Step+Offset) tuple. - // If succeeded, save it to the cache - it might help in folding - // loads. - T.visitAll(S); - if (!Visitor.IndexIsConstant || !Visitor.BaseAddress) - continue; - - const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress); - if (BaseAddrSE->getType() != S->getType()) - continue; - const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE); - const SCEVAddRecExpr *AR = dyn_cast(OffSE); - - if (!AR) - continue; - - const SCEVConstant *StepSE = - dyn_cast(AR->getStepRecurrence(SE)); - const SCEVConstant *StartSE = dyn_cast(AR->getStart()); - if (!StepSE || !StartSE) - continue; - - // Check and skip caching if doing so would require lots of bits to - // avoid overflow. - APInt Start = StartSE->getValue()->getValue(); - APInt Step = StepSE->getValue()->getValue(); - if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32) - continue; - - // We found a cacheable SCEV model for the GEP. - SCEVCache[V] = {Visitor.BaseAddress, - (unsigned)Start.getLimitedValue(), - (unsigned)Step.getLimitedValue()}; - } - } - } - } - public: UnrollAnalyzer(const Loop *L, unsigned TripCount, ScalarEvolution &SE, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) @@ -546,8 +560,8 @@ public: return false; // To avoid compute SCEV-expressions on every iteration, compute them once - // and store interesting to us in SCEVCache. - cacheSCEVResults(); + // and store interesting to us in SCEVGEPCache. + SCEVGEPCache = buildSCEVGEPCache(*L, SE); // Simulate execution of each iteration of the loop counting instructions, // which would be simplified.