mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	[Unroll] Switch from an eagerly populated SCEV cache to one that is
lazily built. Also, make it a much more generic SCEV cache, which today exposes only a reduced GEP model description but could be extended in the future to do other profitable caching of SCEV information. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238124 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -320,81 +320,110 @@ struct FindConstantPointers { | |||||||
| } // End anonymous namespace. | } // End anonymous namespace. | ||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
| /// \brief Struct to represent a GEP whose start and step are known fixed | /// \brief A cache of SCEV results used to optimize repeated queries to SCEV on | ||||||
| /// offsets from a base address due to SCEV's analysis. | /// the same set of instructions. | ||||||
| struct SCEVGEPDescriptor { | /// | ||||||
|   Value *BaseAddr; | /// The primary cost this saves is the cost of checking the validity of a SCEV | ||||||
|   unsigned Start; | /// every time it is looked up. However, in some cases we can provide a reduced | ||||||
|   unsigned Step; | /// and especially useful model for an instruction based upon SCEV that is | ||||||
|  | /// non-trivial to compute but more useful to clients. | ||||||
|  | class SCEVCache { | ||||||
|  | public: | ||||||
|  |   /// \brief Struct to represent a GEP whose start and step are known fixed | ||||||
|  |   /// offsets from a base address due to SCEV's analysis. | ||||||
|  |   struct GEPDescriptor { | ||||||
|  |     Value *BaseAddr = nullptr; | ||||||
|  |     unsigned Start = 0; | ||||||
|  |     unsigned Step = 0; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   Optional<GEPDescriptor> getGEPDescriptor(GetElementPtrInst *GEP); | ||||||
|  |  | ||||||
|  |   SCEVCache(const Loop &L, ScalarEvolution &SE) : L(L), SE(SE) {} | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |   const Loop &L; | ||||||
|  |   ScalarEvolution &SE; | ||||||
|  |  | ||||||
|  |   SmallDenseMap<GetElementPtrInst *, GEPDescriptor> GEPDescriptors; | ||||||
| }; | }; | ||||||
| } // End anonymous namespace. | } // End anonymous namespace. | ||||||
|  |  | ||||||
| /// \brief Build a cache of all the GEP instructions which SCEV can describe. | /// \brief Get a simplified descriptor for a GEP instruction. | ||||||
| /// | /// | ||||||
| /// Visit all GEPs in the loop and find those which after complete loop | /// Where possible, this produces a simplified descriptor for a GEP instruction | ||||||
| /// unrolling would become a constant, or BaseAddress+Constant. For those where | /// using SCEV analysis of the containing loop. If this isn't possible, it | ||||||
| /// we can identify small constant starts and steps from a base address, return | /// returns an empty optional. | ||||||
| /// a map from the GEP to the base, start, and step relevant for that GEP. This | /// | ||||||
| /// is essentially a simplified and fast to query form of the SCEV analysis | /// The model is a base address, an initial offset, and a per-iteration step. | ||||||
| /// which we can afford to look into repeatedly for different iterations of the | /// This fits very common patterns of GEPs inside loops and is something we can | ||||||
| /// loop. | /// use to simulate the behavior of a particular iteration of a loop. | ||||||
| static SmallDenseMap<Value *, SCEVGEPDescriptor> | /// | ||||||
| buildSCEVGEPCache(const Loop &L, ScalarEvolution &SE) { | /// This is a cached interface. The first call may do non-trivial work to | ||||||
|   SmallDenseMap<Value *, SCEVGEPDescriptor> Cache; | /// compute the result, but all subsequent calls will return a fast answer | ||||||
|  | /// based on a cached result. This includes caching negative results. | ||||||
|  | Optional<SCEVCache::GEPDescriptor> | ||||||
|  | SCEVCache::getGEPDescriptor(GetElementPtrInst *GEP) { | ||||||
|  |   decltype(GEPDescriptors)::iterator It; | ||||||
|  |   bool Inserted; | ||||||
|  |  | ||||||
|   for (auto BB : L.getBlocks()) { |   std::tie(It, Inserted) = GEPDescriptors.insert({GEP, {}}); | ||||||
|     for (Instruction &I : *BB) { |  | ||||||
|       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { |  | ||||||
|         Value *V = cast<Value>(GEP); |  | ||||||
|         if (!SE.isSCEVable(V->getType())) |  | ||||||
|             continue; |  | ||||||
|         const SCEV *S = SE.getSCEV(V); |  | ||||||
|  |  | ||||||
|         // FIXME: It'd be nice if the worklist and set used by the |   if (!Inserted) { | ||||||
|         // SCEVTraversal could be re-used between loop iterations, but the |     if (!It->second.BaseAddr) | ||||||
|         // interface doesn't support that. There is no way to clear the visited |       return None; | ||||||
|         // sets between uses. |  | ||||||
|         FindConstantPointers Visitor(&L, SE); |  | ||||||
|         SCEVTraversal<FindConstantPointers> T(Visitor); |  | ||||||
|  |  | ||||||
|         // Try to find (BaseAddress+Step+Offset) tuple. |     return It->second; | ||||||
|         // If succeeded, save it to the cache - it might help in folding |  | ||||||
|         // loads. |  | ||||||
|         T.visitAll(S); |  | ||||||
|         if (!Visitor.IndexIsConstant || !Visitor.BaseAddress) |  | ||||||
|           continue; |  | ||||||
|  |  | ||||||
|         const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress); |  | ||||||
|         if (BaseAddrSE->getType() != S->getType()) |  | ||||||
|           continue; |  | ||||||
|         const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE); |  | ||||||
|         const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffSE); |  | ||||||
|  |  | ||||||
|         if (!AR) |  | ||||||
|           continue; |  | ||||||
|  |  | ||||||
|         const SCEVConstant *StepSE = |  | ||||||
|             dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)); |  | ||||||
|         const SCEVConstant *StartSE = dyn_cast<SCEVConstant>(AR->getStart()); |  | ||||||
|         if (!StepSE || !StartSE) |  | ||||||
|           continue; |  | ||||||
|  |  | ||||||
|         // Check and skip caching if doing so would require lots of bits to |  | ||||||
|         // avoid overflow. |  | ||||||
|         APInt Start = StartSE->getValue()->getValue(); |  | ||||||
|         APInt Step = StepSE->getValue()->getValue(); |  | ||||||
|         if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32) |  | ||||||
|           continue; |  | ||||||
|  |  | ||||||
|         // We found a cacheable SCEV model for the GEP. |  | ||||||
|         Cache[V] = {Visitor.BaseAddress, |  | ||||||
|                     (unsigned)Start.getLimitedValue(), |  | ||||||
|                     (unsigned)Step.getLimitedValue()}; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   return Cache; |   // We've inserted a new record into the cache, so compute the GEP descriptor | ||||||
|  |   // if possible. | ||||||
|  |   Value *V = cast<Value>(GEP); | ||||||
|  |   if (!SE.isSCEVable(V->getType())) | ||||||
|  |     return None; | ||||||
|  |   const SCEV *S = SE.getSCEV(V); | ||||||
|  |  | ||||||
|  |   // FIXME: It'd be nice if the worklist and set used by the | ||||||
|  |   // SCEVTraversal could be re-used between loop iterations, but the | ||||||
|  |   // interface doesn't support that. There is no way to clear the visited | ||||||
|  |   // sets between uses. | ||||||
|  |   FindConstantPointers Visitor(&L, SE); | ||||||
|  |   SCEVTraversal<FindConstantPointers> T(Visitor); | ||||||
|  |  | ||||||
|  |   // Try to find (BaseAddress+Step+Offset) tuple. | ||||||
|  |   // If succeeded, save it to the cache - it might help in folding | ||||||
|  |   // loads. | ||||||
|  |   T.visitAll(S); | ||||||
|  |   if (!Visitor.IndexIsConstant || !Visitor.BaseAddress) | ||||||
|  |     return None; | ||||||
|  |  | ||||||
|  |   const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress); | ||||||
|  |   if (BaseAddrSE->getType() != S->getType()) | ||||||
|  |     return None; | ||||||
|  |   const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE); | ||||||
|  |   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffSE); | ||||||
|  |  | ||||||
|  |   if (!AR) | ||||||
|  |     return None; | ||||||
|  |  | ||||||
|  |   const SCEVConstant *StepSE = | ||||||
|  |       dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)); | ||||||
|  |   const SCEVConstant *StartSE = dyn_cast<SCEVConstant>(AR->getStart()); | ||||||
|  |   if (!StepSE || !StartSE) | ||||||
|  |     return None; | ||||||
|  |  | ||||||
|  |   // Check and skip caching if doing so would require lots of bits to | ||||||
|  |   // avoid overflow. | ||||||
|  |   APInt Start = StartSE->getValue()->getValue(); | ||||||
|  |   APInt Step = StepSE->getValue()->getValue(); | ||||||
|  |   if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32) | ||||||
|  |     return None; | ||||||
|  |  | ||||||
|  |   // We found a cacheable SCEV model for the GEP. | ||||||
|  |   It->second.BaseAddr = Visitor.BaseAddress; | ||||||
|  |   It->second.Start = Start.getLimitedValue(); | ||||||
|  |   It->second.Step = Step.getLimitedValue(); | ||||||
|  |   return It->second; | ||||||
| } | } | ||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
| @@ -421,9 +450,8 @@ class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> { | |||||||
| public: | public: | ||||||
|   UnrolledInstAnalyzer(unsigned Iteration, |   UnrolledInstAnalyzer(unsigned Iteration, | ||||||
|                        DenseMap<Value *, Constant *> &SimplifiedValues, |                        DenseMap<Value *, Constant *> &SimplifiedValues, | ||||||
|                        SmallDenseMap<Value *, SCEVGEPDescriptor> &SCEVGEPCache) |                        SCEVCache &SC) | ||||||
|       : Iteration(Iteration), SimplifiedValues(SimplifiedValues), |       : Iteration(Iteration), SimplifiedValues(SimplifiedValues), SC(SC) {} | ||||||
|         SCEVGEPCache(SCEVGEPCache) {} |  | ||||||
|  |  | ||||||
|   // Allow access to the initial visit method. |   // Allow access to the initial visit method. | ||||||
|   using Base::visit; |   using Base::visit; | ||||||
| @@ -443,10 +471,8 @@ private: | |||||||
|   // post-unrolling. |   // post-unrolling. | ||||||
|   DenseMap<Value *, Constant *> &SimplifiedValues; |   DenseMap<Value *, Constant *> &SimplifiedValues; | ||||||
|  |  | ||||||
|   // To avoid requesting SCEV info on every iteration, request it once, and |   // We use a cache to wrap all our SCEV queries. | ||||||
|   // for each value that would become ConstAddress+Constant after loop |   SCEVCache &SC; | ||||||
|   // unrolling, save the corresponding data. |  | ||||||
|   SmallDenseMap<Value *, SCEVGEPDescriptor> &SCEVGEPCache; |  | ||||||
|  |  | ||||||
|   /// Base case for the instruction visitor. |   /// Base case for the instruction visitor. | ||||||
|   bool visitInstruction(Instruction &I) { return false; }; |   bool visitInstruction(Instruction &I) { return false; }; | ||||||
| @@ -487,12 +513,14 @@ private: | |||||||
|       if (Constant *SimplifiedAddrOp = SimplifiedValues.lookup(AddrOp)) |       if (Constant *SimplifiedAddrOp = SimplifiedValues.lookup(AddrOp)) | ||||||
|         AddrOp = SimplifiedAddrOp; |         AddrOp = SimplifiedAddrOp; | ||||||
|  |  | ||||||
|     auto It = SCEVGEPCache.find(AddrOp); |     auto *GEP = dyn_cast<GetElementPtrInst>(AddrOp); | ||||||
|     if (It == SCEVGEPCache.end()) |     if (!GEP) | ||||||
|  |       return false; | ||||||
|  |     auto OptionalGEPDesc = SC.getGEPDescriptor(GEP); | ||||||
|  |     if (!OptionalGEPDesc) | ||||||
|       return false; |       return false; | ||||||
|     SCEVGEPDescriptor GEPDesc = It->second; |  | ||||||
|  |  | ||||||
|     auto GV = dyn_cast<GlobalVariable>(GEPDesc.BaseAddr); |     auto GV = dyn_cast<GlobalVariable>(OptionalGEPDesc->BaseAddr); | ||||||
|     // We're only interested in loads that can be completely folded to a |     // We're only interested in loads that can be completely folded to a | ||||||
|     // constant. |     // constant. | ||||||
|     if (!GV || !GV->hasInitializer()) |     if (!GV || !GV->hasInitializer()) | ||||||
| @@ -507,9 +535,9 @@ private: | |||||||
|     // low and both the start and step are 32-bit integers. We use signed |     // low and both the start and step are 32-bit integers. We use signed | ||||||
|     // integers so that UBSan will catch if a bug sneaks into the code. |     // integers so that UBSan will catch if a bug sneaks into the code. | ||||||
|     int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U; |     int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U; | ||||||
|     int64_t Index = ((int64_t)GEPDesc.Start + |     int64_t Index = ((int64_t)OptionalGEPDesc->Start + | ||||||
|                       (int64_t)GEPDesc.Step * (int64_t)Iteration) / |                      (int64_t)OptionalGEPDesc->Step * (int64_t)Iteration) / | ||||||
|                      ElemSize; |                     ElemSize; | ||||||
|     if (Index >= CDS->getNumElements()) { |     if (Index >= CDS->getNumElements()) { | ||||||
|       // FIXME: For now we conservatively ignore out of bound accesses, but |       // FIXME: For now we conservatively ignore out of bound accesses, but | ||||||
|       // we're allowed to perform the optimization in this case. |       // we're allowed to perform the optimization in this case. | ||||||
| @@ -562,14 +590,13 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, | |||||||
|       TripCount > UnrollMaxIterationsCountToAnalyze) |       TripCount > UnrollMaxIterationsCountToAnalyze) | ||||||
|     return None; |     return None; | ||||||
|  |  | ||||||
|   // To avoid compute SCEV-expressions on every iteration, compute them once |  | ||||||
|   // and store interesting to us in SCEVGEPCache. |  | ||||||
|   SmallDenseMap<Value *, SCEVGEPDescriptor> SCEVGEPCache = |  | ||||||
|       buildSCEVGEPCache(*L, SE); |  | ||||||
|  |  | ||||||
|   SmallSetVector<BasicBlock *, 16> BBWorklist; |   SmallSetVector<BasicBlock *, 16> BBWorklist; | ||||||
|   DenseMap<Value *, Constant *> SimplifiedValues; |   DenseMap<Value *, Constant *> SimplifiedValues; | ||||||
|  |  | ||||||
|  |   // Use a cache to access SCEV expressions so that we don't pay the cost on | ||||||
|  |   // each iteration. This cache is lazily self-populating. | ||||||
|  |   SCEVCache SC(*L, SE); | ||||||
|  |  | ||||||
|   unsigned NumberOfOptimizedInstructions = 0; |   unsigned NumberOfOptimizedInstructions = 0; | ||||||
|   unsigned UnrolledLoopSize = 0; |   unsigned UnrolledLoopSize = 0; | ||||||
|  |  | ||||||
| @@ -579,7 +606,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, | |||||||
|   // we literally have to go through all loop's iterations. |   // we literally have to go through all loop's iterations. | ||||||
|   for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { |   for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { | ||||||
|     SimplifiedValues.clear(); |     SimplifiedValues.clear(); | ||||||
|     UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SCEVGEPCache); |     UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SC); | ||||||
|  |  | ||||||
|     BBWorklist.clear(); |     BBWorklist.clear(); | ||||||
|     BBWorklist.insert(L->getHeader()); |     BBWorklist.insert(L->getHeader()); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user