mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	Generalize IVUsers to track arbitrary expressions rather than expressions
explicitly split into stride-and-offset pairs. Also, add the ability to track multiple post-increment loops on the same expression. This refines the concept of "normalizing" SCEV expressions used for to post-increment uses, and introduces a dedicated utility routine for normalizing and denormalizing expressions. This fixes the expansion of expressions which are post-increment users of more than one loop at a time. More broadly, this takes LSR another step closer to being able to reason about more than one loop at a time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100699 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -454,6 +454,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { | ||||
|   return Changed; | ||||
| } | ||||
|  | ||||
| // FIXME: It is an extremely bad idea to indvar substitute anything more | ||||
| // complex than affine induction variables.  Doing so will put expensive | ||||
| // polynomial evaluations inside of the loop, and the str reduction pass | ||||
| // currently can only reduce affine polynomials.  For now just disable | ||||
| // indvar subst on anything more complex than an affine addrec, unless | ||||
| // it can be expanded to a trivial value. | ||||
| static bool isSafe(const SCEV *S, const Loop *L) { | ||||
|   // Loop-invariant values are safe. | ||||
|   if (S->isLoopInvariant(L)) return true; | ||||
|  | ||||
|   // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how | ||||
|   // to transform them into efficient code. | ||||
|   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) | ||||
|     return AR->isAffine(); | ||||
|  | ||||
|   // An add is safe it all its operands are safe. | ||||
|   if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { | ||||
|     for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), | ||||
|          E = Commutative->op_end(); I != E; ++I) | ||||
|       if (!isSafe(*I, L)) return false; | ||||
|     return true; | ||||
|   } | ||||
|    | ||||
|   // A cast is safe if its operand is. | ||||
|   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) | ||||
|     return isSafe(C->getOperand(), L); | ||||
|  | ||||
|   // A udiv is safe if its operands are. | ||||
|   if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) | ||||
|     return isSafe(UD->getLHS(), L) && | ||||
|            isSafe(UD->getRHS(), L); | ||||
|  | ||||
|   // SCEVUnknown is always safe. | ||||
|   if (isa<SCEVUnknown>(S)) | ||||
|     return true; | ||||
|  | ||||
|   // Nothing else is safe. | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { | ||||
|   SmallVector<WeakVH, 16> DeadInsts; | ||||
|  | ||||
| @@ -465,7 +505,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { | ||||
|   // the need for the code evaluation methods to insert induction variables | ||||
|   // of different sizes. | ||||
|   for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { | ||||
|     const SCEV *Stride = UI->getStride(); | ||||
|     Value *Op = UI->getOperandValToReplace(); | ||||
|     const Type *UseTy = Op->getType(); | ||||
|     Instruction *User = UI->getUser(); | ||||
| @@ -486,7 +525,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { | ||||
|     // currently can only reduce affine polynomials.  For now just disable | ||||
|     // indvar subst on anything more complex than an affine addrec, unless | ||||
|     // it can be expanded to a trivial value. | ||||
|     if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) | ||||
|     if (!isSafe(AR, L)) | ||||
|       continue; | ||||
|  | ||||
|     // Determine the insertion point for this user. By default, insert | ||||
|   | ||||
| @@ -781,10 +781,10 @@ struct LSRFixup { | ||||
|   /// will be replaced. | ||||
|   Value *OperandValToReplace; | ||||
|  | ||||
|   /// PostIncLoop - If this user is to use the post-incremented value of an | ||||
|   /// PostIncLoops - If this user is to use the post-incremented value of an | ||||
|   /// induction variable, this variable is non-null and holds the loop | ||||
|   /// associated with the induction variable. | ||||
|   const Loop *PostIncLoop; | ||||
|   PostIncLoopSet PostIncLoops; | ||||
|  | ||||
|   /// LUIdx - The index of the LSRUse describing the expression which | ||||
|   /// this fixup needs, minus an offset (below). | ||||
| @@ -795,6 +795,8 @@ struct LSRFixup { | ||||
|   /// offsets, for example in an unrolled loop. | ||||
|   int64_t Offset; | ||||
|  | ||||
|   bool isUseFullyOutsideLoop(const Loop *L) const; | ||||
|  | ||||
|   LSRFixup(); | ||||
|  | ||||
|   void print(raw_ostream &OS) const; | ||||
| @@ -804,9 +806,24 @@ struct LSRFixup { | ||||
| } | ||||
|  | ||||
| LSRFixup::LSRFixup() | ||||
|   : UserInst(0), OperandValToReplace(0), PostIncLoop(0), | ||||
|   : UserInst(0), OperandValToReplace(0), | ||||
|     LUIdx(~size_t(0)), Offset(0) {} | ||||
|  | ||||
| /// isUseFullyOutsideLoop - Test whether this fixup always uses its | ||||
| /// value outside of the given loop. | ||||
| bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { | ||||
|   // PHI nodes use their value in their incoming blocks. | ||||
|   if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) { | ||||
|     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) | ||||
|       if (PN->getIncomingValue(i) == OperandValToReplace && | ||||
|           L->contains(PN->getIncomingBlock(i))) | ||||
|         return false; | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   return !L->contains(UserInst); | ||||
| } | ||||
|  | ||||
| void LSRFixup::print(raw_ostream &OS) const { | ||||
|   OS << "UserInst="; | ||||
|   // Store is common and interesting enough to be worth special-casing. | ||||
| @@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const { | ||||
|   OS << ", OperandValToReplace="; | ||||
|   WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); | ||||
|  | ||||
|   if (PostIncLoop) { | ||||
|   for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(), | ||||
|        E = PostIncLoops.end(); I != E; ++I) { | ||||
|     OS << ", PostIncLoop="; | ||||
|     WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false); | ||||
|     WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false); | ||||
|   } | ||||
|  | ||||
|   if (LUIdx != ~size_t(0)) | ||||
| @@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() { | ||||
|             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { | ||||
|           // Conservatively assume there may be reuse if the quotient of their | ||||
|           // strides could be a legal scale. | ||||
|           const SCEV *A = CondUse->getStride(); | ||||
|           const SCEV *B = UI->getStride(); | ||||
|           const SCEV *A = CondUse->getStride(L); | ||||
|           const SCEV *B = UI->getStride(L); | ||||
|           if (!A || !B) continue; | ||||
|           if (SE.getTypeSizeInBits(A->getType()) != | ||||
|               SE.getTypeSizeInBits(B->getType())) { | ||||
|             if (SE.getTypeSizeInBits(A->getType()) > | ||||
| @@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() { | ||||
|         ExitingBlock->getInstList().insert(TermBr, Cond); | ||||
|  | ||||
|         // Clone the IVUse, as the old use still exists! | ||||
|         CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(), | ||||
|         CondUse = &IU.AddUser(CondUse->getExpr(), | ||||
|                               Cond, CondUse->getOperandValToReplace()); | ||||
|         TermBr->replaceUsesOfWith(OldCond, Cond); | ||||
|       } | ||||
| @@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() { | ||||
|     // If we get to here, we know that we can transform the setcc instruction to | ||||
|     // use the post-incremented version of the IV, allowing us to coalesce the | ||||
|     // live ranges for the IV correctly. | ||||
|     CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), | ||||
|                                        CondUse->getStride())); | ||||
|     CondUse->setIsUseOfPostIncrementedValue(true); | ||||
|     CondUse->transformToPostInc(L); | ||||
|     Changed = true; | ||||
|  | ||||
|     PostIncs.insert(Cond); | ||||
| @@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() { | ||||
|   SmallSetVector<const SCEV *, 4> Strides; | ||||
|  | ||||
|   // Collect interesting types and strides. | ||||
|   SmallVector<const SCEV *, 4> Worklist; | ||||
|   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { | ||||
|     const SCEV *Stride = UI->getStride(); | ||||
|     const SCEV *Expr = UI->getExpr(); | ||||
|  | ||||
|     // Collect interesting types. | ||||
|     Types.insert(SE.getEffectiveSCEVType(Stride->getType())); | ||||
|     Types.insert(SE.getEffectiveSCEVType(Expr->getType())); | ||||
|  | ||||
|     // Add the stride for this loop. | ||||
|     Strides.insert(Stride); | ||||
|  | ||||
|     // Add strides for other mentioned loops. | ||||
|     for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset()); | ||||
|          AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart())) | ||||
|       Strides.insert(AR->getStepRecurrence(SE)); | ||||
|     // Add strides for mentioned loops. | ||||
|     Worklist.push_back(Expr); | ||||
|     do { | ||||
|       const SCEV *S = Worklist.pop_back_val(); | ||||
|       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { | ||||
|         Strides.insert(AR->getStepRecurrence(SE)); | ||||
|         Worklist.push_back(AR->getStart()); | ||||
|       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { | ||||
|         Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); | ||||
|       } | ||||
|     } while (!Worklist.empty()); | ||||
|   } | ||||
|  | ||||
|   // Compute interesting factors from the set of interesting strides. | ||||
| @@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { | ||||
|     LSRFixup &LF = getNewFixup(); | ||||
|     LF.UserInst = UI->getUser(); | ||||
|     LF.OperandValToReplace = UI->getOperandValToReplace(); | ||||
|     if (UI->isUseOfPostIncrementedValue()) | ||||
|       LF.PostIncLoop = L; | ||||
|     LF.PostIncLoops = UI->getPostIncLoops(); | ||||
|  | ||||
|     LSRUse::KindType Kind = LSRUse::Basic; | ||||
|     const Type *AccessTy = 0; | ||||
| @@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { | ||||
|       AccessTy = getAccessType(LF.UserInst); | ||||
|     } | ||||
|  | ||||
|     const SCEV *S = IU.getCanonicalExpr(*UI); | ||||
|     const SCEV *S = UI->getExpr(); | ||||
|  | ||||
|     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as | ||||
|     // (N - i == 0), and this allows (N - i) to be the expression that we work | ||||
| @@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { | ||||
|     LF.LUIdx = P.first; | ||||
|     LF.Offset = P.second; | ||||
|     LSRUse &LU = Uses[LF.LUIdx]; | ||||
|     LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst); | ||||
|     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); | ||||
|  | ||||
|     // If this is the first use of this LSRUse, give it a formula. | ||||
|     if (LU.Formulae.empty()) { | ||||
| @@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { | ||||
|         LF.LUIdx = P.first; | ||||
|         LF.Offset = P.second; | ||||
|         LSRUse &LU = Uses[LF.LUIdx]; | ||||
|         LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst); | ||||
|         LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); | ||||
|         InsertSupplementalFormula(U, LU, LF.LUIdx); | ||||
|         CountRegisters(LU.Formulae.back(), Uses.size() - 1); | ||||
|         break; | ||||
| @@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | ||||
|                            SmallVectorImpl<WeakVH> &DeadInsts) const { | ||||
|   const LSRUse &LU = Uses[LF.LUIdx]; | ||||
|  | ||||
|   // Then, collect some instructions which we will remain dominated by when | ||||
|   // expanding the replacement. These must be dominated by any operands that | ||||
|   // Then, collect some instructions which must be dominated by the | ||||
|   // expanding replacement. These must be dominated by any operands that | ||||
|   // will be required in the expansion. | ||||
|   SmallVector<Instruction *, 4> Inputs; | ||||
|   if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) | ||||
| @@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | ||||
|     if (Instruction *I = | ||||
|           dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) | ||||
|       Inputs.push_back(I); | ||||
|   if (LF.PostIncLoop) { | ||||
|     if (!L->contains(LF.UserInst)) | ||||
|   if (LF.PostIncLoops.count(L)) { | ||||
|     if (LF.isUseFullyOutsideLoop(L)) | ||||
|       Inputs.push_back(L->getLoopLatch()->getTerminator()); | ||||
|     else | ||||
|       Inputs.push_back(IVIncInsertPos); | ||||
| @@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | ||||
|  | ||||
|   // Inform the Rewriter if we have a post-increment use, so that it can | ||||
|   // perform an advantageous expansion. | ||||
|   Rewriter.setPostInc(LF.PostIncLoop); | ||||
|   Rewriter.setPostInc(LF.PostIncLoops); | ||||
|  | ||||
|   // This is the type that the user actually needs. | ||||
|   const Type *OpTy = LF.OperandValToReplace->getType(); | ||||
| @@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | ||||
|     const SCEV *Reg = *I; | ||||
|     assert(!Reg->isZero() && "Zero allocated in a base register!"); | ||||
|  | ||||
|     // If we're expanding for a post-inc user for the add-rec's loop, make the | ||||
|     // post-inc adjustment. | ||||
|     const SCEV *Start = Reg; | ||||
|     while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) { | ||||
|       if (AR->getLoop() == LF.PostIncLoop) { | ||||
|         Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE)); | ||||
|         // If the user is inside the loop, insert the code after the increment | ||||
|         // so that it is dominated by its operand. If the original insert point | ||||
|         // was already dominated by the increment, keep it, because there may | ||||
|         // be loop-variant operands that need to be respected also. | ||||
|         if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) { | ||||
|           IP = IVIncInsertPos; | ||||
|           while (isa<DbgInfoIntrinsic>(IP)) ++IP; | ||||
|         } | ||||
|         break; | ||||
|       } | ||||
|       Start = AR->getStart(); | ||||
|     } | ||||
|     // If we're expanding for a post-inc user, make the post-inc adjustment. | ||||
|     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); | ||||
|     Reg = TransformForPostIncUse(Denormalize, Reg, | ||||
|                                  LF.UserInst, LF.OperandValToReplace, | ||||
|                                  Loops, SE, DT); | ||||
|  | ||||
|     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); | ||||
|   } | ||||
| @@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | ||||
|   if (F.AM.Scale != 0) { | ||||
|     const SCEV *ScaledS = F.ScaledReg; | ||||
|  | ||||
|     // If we're expanding for a post-inc user for the add-rec's loop, make the | ||||
|     // post-inc adjustment. | ||||
|     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS)) | ||||
|       if (AR->getLoop() == LF.PostIncLoop) | ||||
|         ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE)); | ||||
|     // If we're expanding for a post-inc user, make the post-inc adjustment. | ||||
|     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); | ||||
|     ScaledS = TransformForPostIncUse(Denormalize, ScaledS, | ||||
|                                      LF.UserInst, LF.OperandValToReplace, | ||||
|                                      Loops, SE, DT); | ||||
|  | ||||
|     if (LU.Kind == LSRUse::ICmpZero) { | ||||
|       // An interesting way of "folding" with an icmp is to use a negated | ||||
| @@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | ||||
|   Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); | ||||
|  | ||||
|   // We're done expanding now, so reset the rewriter. | ||||
|   Rewriter.setPostInc(0); | ||||
|   Rewriter.clearPostInc(); | ||||
|  | ||||
|   // An ICmpZero Formula represents an ICmp which we're handling as a | ||||
|   // comparison against zero. Now that we've expanded an expression for that | ||||
|   | ||||
		Reference in New Issue
	
	Block a user