mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-26 18:20:39 +00:00 
			
		
		
		
	Generalize IVUsers to track arbitrary expressions rather than expressions
explicitly split into stride-and-offset pairs. Also, add the ability to track multiple post-increment loops on the same expression. This refines the concept of "normalizing" SCEV expressions used for to post-increment uses, and introduces a dedicated utility routine for normalizing and denormalizing expressions. This fixes the expansion of expressions which are post-increment users of more than one loop at a time. More broadly, this takes LSR another step closer to being able to reason about more than one loop at a time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100699 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -16,6 +16,7 @@ | |||||||
| #define LLVM_ANALYSIS_IVUSERS_H | #define LLVM_ANALYSIS_IVUSERS_H | ||||||
|  |  | ||||||
| #include "llvm/Analysis/LoopPass.h" | #include "llvm/Analysis/LoopPass.h" | ||||||
|  | #include "llvm/Analysis/ScalarEvolutionNormalization.h" | ||||||
| #include "llvm/Support/ValueHandle.h" | #include "llvm/Support/ValueHandle.h" | ||||||
|  |  | ||||||
| namespace llvm { | namespace llvm { | ||||||
| @@ -26,17 +27,18 @@ class Value; | |||||||
| class IVUsers; | class IVUsers; | ||||||
| class ScalarEvolution; | class ScalarEvolution; | ||||||
| class SCEV; | class SCEV; | ||||||
|  | class IVUsers; | ||||||
|  |  | ||||||
| /// IVStrideUse - Keep track of one use of a strided induction variable. | /// IVStrideUse - Keep track of one use of a strided induction variable. | ||||||
| /// The Expr member keeps track of the expression, User is the actual user | /// The Expr member keeps track of the expression, User is the actual user | ||||||
| /// instruction of the operand, and 'OperandValToReplace' is the operand of | /// instruction of the operand, and 'OperandValToReplace' is the operand of | ||||||
| /// the User that is the use. | /// the User that is the use. | ||||||
| class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> { | class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> { | ||||||
|  |   friend class IVUsers; | ||||||
| public: | public: | ||||||
|   IVStrideUse(IVUsers *P, const SCEV *S, const SCEV *Off, |   IVStrideUse(IVUsers *P, const SCEV *E, | ||||||
|               Instruction* U, Value *O) |               Instruction* U, Value *O) | ||||||
|     : CallbackVH(U), Parent(P), Stride(S), Offset(Off), |     : CallbackVH(U), Parent(P), Expr(E), OperandValToReplace(O) { | ||||||
|       OperandValToReplace(O), IsUseOfPostIncrementedValue(false) { |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /// getUser - Return the user instruction for this use. |   /// getUser - Return the user instruction for this use. | ||||||
| @@ -53,23 +55,15 @@ public: | |||||||
|   /// this IVStrideUse. |   /// this IVStrideUse. | ||||||
|   IVUsers *getParent() const { return Parent; } |   IVUsers *getParent() const { return Parent; } | ||||||
|  |  | ||||||
|   /// getStride - Return the expression for the stride for the use. |   /// getExpr - Return the expression for the use. | ||||||
|   const SCEV *getStride() const { return Stride; } |   const SCEV *getExpr() const { return Expr; } | ||||||
|  |  | ||||||
|   /// setStride - Assign a new stride to this use. |   /// setExpr - Assign a new expression to this use. | ||||||
|   void setStride(const SCEV *Val) { |   void setExpr(const SCEV *Val) { | ||||||
|     Stride = Val; |     Expr = Val; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /// getOffset - Return the offset to add to a theoretical induction |   const SCEV *getStride(const Loop *L) const; | ||||||
|   /// variable that starts at zero and counts up by the stride to compute |  | ||||||
|   /// the value for the use. This always has the same type as the stride. |  | ||||||
|   const SCEV *getOffset() const { return Offset; } |  | ||||||
|  |  | ||||||
|   /// setOffset - Assign a new offset to this use. |  | ||||||
|   void setOffset(const SCEV *Val) { |  | ||||||
|     Offset = Val; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /// getOperandValToReplace - Return the Value of the operand in the user |   /// getOperandValToReplace - Return the Value of the operand in the user | ||||||
|   /// instruction that this IVStrideUse is representing. |   /// instruction that this IVStrideUse is representing. | ||||||
| @@ -83,37 +77,30 @@ public: | |||||||
|     OperandValToReplace = Op; |     OperandValToReplace = Op; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /// isUseOfPostIncrementedValue - True if this should use the |   /// getPostIncLoops - Return the set of loops for which the expression has | ||||||
|   /// post-incremented version of this IV, not the preincremented version. |   /// been adjusted to use post-inc mode. | ||||||
|   /// This can only be set in special cases, such as the terminating setcc |   const PostIncLoopSet &getPostIncLoops() const { | ||||||
|   /// instruction for a loop or uses dominated by the loop. |     return PostIncLoops; | ||||||
|   bool isUseOfPostIncrementedValue() const { |  | ||||||
|     return IsUseOfPostIncrementedValue; |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /// setIsUseOfPostIncrmentedValue - set the flag that indicates whether |   /// transformToPostInc - Transform the expression to post-inc form for the | ||||||
|   /// this is a post-increment use. |   /// given loop. | ||||||
|   void setIsUseOfPostIncrementedValue(bool Val) { |   void transformToPostInc(const Loop *L); | ||||||
|     IsUseOfPostIncrementedValue = Val; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| private: | private: | ||||||
|   /// Parent - a pointer to the IVUsers that owns this IVStrideUse. |   /// Parent - a pointer to the IVUsers that owns this IVStrideUse. | ||||||
|   IVUsers *Parent; |   IVUsers *Parent; | ||||||
|  |  | ||||||
|   /// Stride - The stride for this use. |   /// Expr - The expression for this use. | ||||||
|   const SCEV *Stride; |   const SCEV *Expr; | ||||||
|  |  | ||||||
|   /// Offset - The offset to add to the base induction expression. |  | ||||||
|   const SCEV *Offset; |  | ||||||
|  |  | ||||||
|   /// OperandValToReplace - The Value of the operand in the user instruction |   /// OperandValToReplace - The Value of the operand in the user instruction | ||||||
|   /// that this IVStrideUse is representing. |   /// that this IVStrideUse is representing. | ||||||
|   WeakVH OperandValToReplace; |   WeakVH OperandValToReplace; | ||||||
|  |  | ||||||
|   /// IsUseOfPostIncrementedValue - True if this should use the |   /// PostIncLoops - The set of loops for which Expr has been adjusted to | ||||||
|   /// post-incremented version of this IV, not the preincremented version. |   /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept. | ||||||
|   bool IsUseOfPostIncrementedValue; |   PostIncLoopSet PostIncLoops; | ||||||
|  |  | ||||||
|   /// Deleted - Implementation of CallbackVH virtual function to |   /// Deleted - Implementation of CallbackVH virtual function to | ||||||
|   /// receive notification when the User is deleted. |   /// receive notification when the User is deleted. | ||||||
| @@ -174,18 +161,13 @@ public: | |||||||
|   /// return true.  Otherwise, return false. |   /// return true.  Otherwise, return false. | ||||||
|   bool AddUsersIfInteresting(Instruction *I); |   bool AddUsersIfInteresting(Instruction *I); | ||||||
|  |  | ||||||
|   IVStrideUse &AddUser(const SCEV *Stride, const SCEV *Offset, |   IVStrideUse &AddUser(const SCEV *Expr, | ||||||
|                        Instruction *User, Value *Operand); |                        Instruction *User, Value *Operand); | ||||||
|  |  | ||||||
|   /// getReplacementExpr - Return a SCEV expression which computes the |   /// getReplacementExpr - Return a SCEV expression which computes the | ||||||
|   /// value of the OperandValToReplace of the given IVStrideUse. |   /// value of the OperandValToReplace of the given IVStrideUse. | ||||||
|   const SCEV *getReplacementExpr(const IVStrideUse &U) const; |   const SCEV *getReplacementExpr(const IVStrideUse &U) const; | ||||||
|  |  | ||||||
|   /// getCanonicalExpr - Return a SCEV expression which computes the |  | ||||||
|   /// value of the SCEV of the given IVStrideUse, ignoring the  |  | ||||||
|   /// isUseOfPostIncrementedValue flag. |  | ||||||
|   const SCEV *getCanonicalExpr(const IVStrideUse &U) const; |  | ||||||
|  |  | ||||||
|   typedef ilist<IVStrideUse>::iterator iterator; |   typedef ilist<IVStrideUse>::iterator iterator; | ||||||
|   typedef ilist<IVStrideUse>::const_iterator const_iterator; |   typedef ilist<IVStrideUse>::const_iterator const_iterator; | ||||||
|   iterator begin() { return IVUses.begin(); } |   iterator begin() { return IVUses.begin(); } | ||||||
|   | |||||||
| @@ -15,6 +15,7 @@ | |||||||
| #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H | #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H | ||||||
|  |  | ||||||
| #include "llvm/Analysis/ScalarEvolutionExpressions.h" | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||||
|  | #include "llvm/Analysis/ScalarEvolutionNormalization.h" | ||||||
| #include "llvm/Support/IRBuilder.h" | #include "llvm/Support/IRBuilder.h" | ||||||
| #include "llvm/Support/TargetFolder.h" | #include "llvm/Support/TargetFolder.h" | ||||||
| #include <set> | #include <set> | ||||||
| @@ -32,12 +33,12 @@ namespace llvm { | |||||||
|       InsertedExpressions; |       InsertedExpressions; | ||||||
|     std::set<Value*> InsertedValues; |     std::set<Value*> InsertedValues; | ||||||
|  |  | ||||||
|     /// PostIncLoop - When non-null, expanded addrecs referring to the given |     /// PostIncLoops - Addrecs referring to any of the given loops are expanded | ||||||
|     /// loop expanded in post-inc mode. For example, expanding {1,+,1}<L> in |     /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode | ||||||
|     /// post-inc mode returns the add instruction that adds one to the phi |     /// returns the add instruction that adds one to the phi for {0,+,1}<L>, | ||||||
|     /// for {0,+,1}<L>, as opposed to a new phi starting at 1. This is only |     /// as opposed to a new phi starting at 1. This is only supported in | ||||||
|     /// supported in non-canonical mode. |     /// non-canonical mode. | ||||||
|     const Loop *PostIncLoop; |     PostIncLoopSet PostIncLoops; | ||||||
|  |  | ||||||
|     /// IVIncInsertPos - When this is non-null, addrecs expanded in the |     /// IVIncInsertPos - When this is non-null, addrecs expanded in the | ||||||
|     /// loop it indicates should be inserted with increments at |     /// loop it indicates should be inserted with increments at | ||||||
| @@ -62,7 +63,7 @@ namespace llvm { | |||||||
|   public: |   public: | ||||||
|     /// SCEVExpander - Construct a SCEVExpander in "canonical" mode. |     /// SCEVExpander - Construct a SCEVExpander in "canonical" mode. | ||||||
|     explicit SCEVExpander(ScalarEvolution &se) |     explicit SCEVExpander(ScalarEvolution &se) | ||||||
|       : SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true), |       : SE(se), IVIncInsertLoop(0), CanonicalMode(true), | ||||||
|         Builder(se.getContext(), TargetFolder(se.TD)) {} |         Builder(se.getContext(), TargetFolder(se.TD)) {} | ||||||
|  |  | ||||||
|     /// clear - Erase the contents of the InsertedExpressions map so that users |     /// clear - Erase the contents of the InsertedExpressions map so that users | ||||||
| @@ -89,14 +90,18 @@ namespace llvm { | |||||||
|       IVIncInsertPos = Pos; |       IVIncInsertPos = Pos; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// setPostInc - If L is non-null, enable post-inc expansion for addrecs |     /// setPostInc - Enable post-inc expansion for addrecs referring to the | ||||||
|     /// referring to the given loop. If L is null, disable post-inc expansion |     /// given loops. Post-inc expansion is only supported in non-canonical | ||||||
|     /// completely. Post-inc expansion is only supported in non-canonical |  | ||||||
|     /// mode. |     /// mode. | ||||||
|     void setPostInc(const Loop *L) { |     void setPostInc(const PostIncLoopSet &L) { | ||||||
|       assert(!CanonicalMode && |       assert(!CanonicalMode && | ||||||
|              "Post-inc expansion is not supported in CanonicalMode"); |              "Post-inc expansion is not supported in CanonicalMode"); | ||||||
|       PostIncLoop = L; |       PostIncLoops = L; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// clearPostInc - Disable all post-inc expansion. | ||||||
|  |     void clearPostInc() { | ||||||
|  |       PostIncLoops.clear(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// disableCanonicalMode - Disable the behavior of expanding expressions in |     /// disableCanonicalMode - Disable the behavior of expanding expressions in | ||||||
|   | |||||||
							
								
								
									
										78
									
								
								include/llvm/Analysis/ScalarEvolutionNormalization.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								include/llvm/Analysis/ScalarEvolutionNormalization.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | |||||||
|  | //===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===// | ||||||
|  | // | ||||||
|  | //                     The LLVM Compiler Infrastructure | ||||||
|  | // | ||||||
|  | // This file is distributed under the University of Illinois Open Source | ||||||
|  | // License. See LICENSE.TXT for details. | ||||||
|  | // | ||||||
|  | //===----------------------------------------------------------------------===// | ||||||
|  | // | ||||||
|  | // This file defines utilities for working with "normalized" ScalarEvolution | ||||||
|  | // expressions. | ||||||
|  | // | ||||||
|  | // The following example illustrates post-increment uses and how normalized | ||||||
|  | // expressions help. | ||||||
|  | // | ||||||
|  | //   for (i=0; i!=n; ++i) { | ||||||
|  | //     ... | ||||||
|  | //   } | ||||||
|  | //   use(i); | ||||||
|  | // | ||||||
|  | // While the expression for most uses of i inside the loop is {0,+,1}<%L>, the | ||||||
|  | // expression for the use of i outside the loop is {1,+,1}<%L>, since i is | ||||||
|  | // incremented at the end of the loop body. This is inconveient, since it | ||||||
|  | // suggests that we need two different induction variables, one that starts | ||||||
|  | // at 0 and one that starts at 1. We'd prefer to be able to think of these as | ||||||
|  | // the same induction variable, with uses inside the loop using the | ||||||
|  | // "pre-incremented" value, and uses after the loop using the | ||||||
|  | // "post-incremented" value. | ||||||
|  | // | ||||||
|  | // Expressions for post-incremented uses are represented as an expression | ||||||
|  | // paired with a set of loops for which the expression is in "post-increment" | ||||||
|  | // mode (there may be multiple loops). | ||||||
|  | // | ||||||
|  | //===----------------------------------------------------------------------===// | ||||||
|  |  | ||||||
|  | #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H | ||||||
|  | #define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H | ||||||
|  |  | ||||||
|  | #include "llvm/ADT/SmallPtrSet.h" | ||||||
|  |  | ||||||
|  | namespace llvm { | ||||||
|  |  | ||||||
|  | class Instruction; | ||||||
|  | class DominatorTree; | ||||||
|  | class Loop; | ||||||
|  | class ScalarEvolution; | ||||||
|  | class SCEV; | ||||||
|  | class Value; | ||||||
|  |  | ||||||
|  | /// TransformKind - Different types of transformations that | ||||||
|  | /// TransformForPostIncUse can do. | ||||||
|  | enum TransformKind { | ||||||
|  |   /// Normalize - Normalize according to the given loops. | ||||||
|  |   Normalize, | ||||||
|  |   /// NormalizeAutodetect - Detect post-inc opportunities on new expressions, | ||||||
|  |   /// update the given loop set, and normalize. | ||||||
|  |   NormalizeAutodetect, | ||||||
|  |   /// Denormalize - Perform the inverse transform on the expression with the | ||||||
|  |   /// given loop set. | ||||||
|  |   Denormalize | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | /// PostIncLoopSet - A set of loops. | ||||||
|  | typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet; | ||||||
|  |  | ||||||
|  | /// TransformForPostIncUse - Transform the given expression according to the | ||||||
|  | /// given transformation kind. | ||||||
|  | const SCEV *TransformForPostIncUse(TransformKind Kind, | ||||||
|  |                                    const SCEV *S, | ||||||
|  |                                    Instruction *User, | ||||||
|  |                                    Value *OperandValToReplace, | ||||||
|  |                                    PostIncLoopSet &Loops, | ||||||
|  |                                    ScalarEvolution &SE, | ||||||
|  |                                    DominatorTree &DT); | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #endif | ||||||
| @@ -62,120 +62,34 @@ static void CollectSubexprs(const SCEV *S, | |||||||
|   Ops.push_back(S); |   Ops.push_back(S); | ||||||
| } | } | ||||||
|  |  | ||||||
| /// getSCEVStartAndStride - Compute the start and stride of this expression, | /// isInteresting - Test whether the given expression is "interesting" when | ||||||
| /// returning false if the expression is not a start/stride pair, or true if it | /// used by the given expression, within the context of analyzing the | ||||||
| /// is.  The stride must be a loop invariant expression, but the start may be | /// given loop. | ||||||
| /// a mix of loop invariant and loop variant expressions.  The start cannot, | static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) { | ||||||
| /// however, contain an AddRec from a different loop, unless that loop is an |   // Anything loop-invariant is interesting. | ||||||
| /// outer loop of the current loop. |   if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L)) | ||||||
| static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, |  | ||||||
|                                   const SCEV *&Start, const SCEV *&Stride, |  | ||||||
|                                   ScalarEvolution *SE, DominatorTree *DT) { |  | ||||||
|   const SCEV *TheAddRec = Start;   // Initialize to zero. |  | ||||||
|  |  | ||||||
|   // If the outer level is an AddExpr, the operands are all start values except |  | ||||||
|   // for a nested AddRecExpr. |  | ||||||
|   if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { |  | ||||||
|     for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) |  | ||||||
|       if (const SCEVAddRecExpr *AddRec = |  | ||||||
|              dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) |  | ||||||
|         TheAddRec = SE->getAddExpr(AddRec, TheAddRec); |  | ||||||
|       else |  | ||||||
|         Start = SE->getAddExpr(Start, AE->getOperand(i)); |  | ||||||
|   } else if (isa<SCEVAddRecExpr>(SH)) { |  | ||||||
|     TheAddRec = SH; |  | ||||||
|   } else { |  | ||||||
|     return false;  // not analyzable. |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Break down TheAddRec into its component parts. |  | ||||||
|   SmallVector<const SCEV *, 4> Subexprs; |  | ||||||
|   CollectSubexprs(TheAddRec, Subexprs, *SE); |  | ||||||
|  |  | ||||||
|   // Look for an addrec on the current loop among the parts. |  | ||||||
|   const SCEV *AddRecStride = 0; |  | ||||||
|   for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(), |  | ||||||
|        E = Subexprs.end(); I != E; ++I) { |  | ||||||
|     const SCEV *S = *I; |  | ||||||
|     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) |  | ||||||
|       if (AR->getLoop() == L) { |  | ||||||
|         *I = AR->getStart(); |  | ||||||
|         AddRecStride = AR->getStepRecurrence(*SE); |  | ||||||
|         break; |  | ||||||
|       } |  | ||||||
|   } |  | ||||||
|   if (!AddRecStride) |  | ||||||
|     return false; |  | ||||||
|  |  | ||||||
|   // Add up everything else into a start value (which may not be |  | ||||||
|   // loop-invariant). |  | ||||||
|   const SCEV *AddRecStart = SE->getAddExpr(Subexprs); |  | ||||||
|  |  | ||||||
|   // Use getSCEVAtScope to attempt to simplify other loops out of |  | ||||||
|   // the picture. |  | ||||||
|   AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); |  | ||||||
|  |  | ||||||
|   Start = SE->getAddExpr(Start, AddRecStart); |  | ||||||
|  |  | ||||||
|   // If stride is an instruction, make sure it properly dominates the header. |  | ||||||
|   // Otherwise we could end up with a use before def situation. |  | ||||||
|   if (!isa<SCEVConstant>(AddRecStride)) { |  | ||||||
|     BasicBlock *Header = L->getHeader(); |  | ||||||
|     if (!AddRecStride->properlyDominates(Header, DT)) |  | ||||||
|       return false; |  | ||||||
|  |  | ||||||
|     DEBUG(dbgs() << "["; |  | ||||||
|           WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); |  | ||||||
|           dbgs() << "] Variable stride: " << *AddRecStride << "\n"); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   Stride = AddRecStride; |  | ||||||
|   return true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression |  | ||||||
| /// and now we need to decide whether the user should use the preinc or post-inc |  | ||||||
| /// value.  If this user should use the post-inc version of the IV, return true. |  | ||||||
| /// |  | ||||||
| /// Choosing wrong here can break dominance properties (if we choose to use the |  | ||||||
| /// post-inc value when we cannot) or it can end up adding extra live-ranges to |  | ||||||
| /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we |  | ||||||
| /// should use the post-inc value). |  | ||||||
| static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, |  | ||||||
|                                        const Loop *L, DominatorTree *DT) { |  | ||||||
|   // If the user is in the loop, use the preinc value. |  | ||||||
|   if (L->contains(User)) return false; |  | ||||||
|  |  | ||||||
|   BasicBlock *LatchBlock = L->getLoopLatch(); |  | ||||||
|   if (!LatchBlock) |  | ||||||
|     return false; |  | ||||||
|  |  | ||||||
|   // Ok, the user is outside of the loop.  If it is dominated by the latch |  | ||||||
|   // block, use the post-inc value. |  | ||||||
|   if (DT->dominates(LatchBlock, User->getParent())) |  | ||||||
|     return true; |     return true; | ||||||
|  |  | ||||||
|   // There is one case we have to be careful of: PHI nodes.  These little guys |   // An addrec is interesting if it's affine or if it has an interesting start. | ||||||
|   // can live in blocks that are not dominated by the latch block, but (since |   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { | ||||||
|   // their uses occur in the predecessor block, not the block the PHI lives in) |     // Keep things simple. Don't touch loop-variant strides. | ||||||
|   // should still use the post-inc value.  Check for this case now. |     if (AR->getLoop() == L && (AR->isAffine() || !L->contains(I))) | ||||||
|   PHINode *PN = dyn_cast<PHINode>(User); |         return true; | ||||||
|   if (!PN) return false;  // not a phi, not dominated by latch block. |     // Otherwise recurse to see if the start value is interesting. | ||||||
|  |     return isInteresting(AR->getStart(), I, L); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   // Look at all of the uses of IV by the PHI node.  If any use corresponds to |   // An add is interesting if any of its operands is. | ||||||
|   // a block that is not dominated by the latch block, give up and use the |   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { | ||||||
|   // preincremented value. |     for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); | ||||||
|   unsigned NumUses = 0; |          OI != OE; ++OI) | ||||||
|   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |       if (isInteresting(*OI, I, L)) | ||||||
|     if (PN->getIncomingValue(i) == IV) { |         return true; | ||||||
|       ++NumUses; |     return false; | ||||||
|       if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) |   } | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   // Okay, all uses of IV by PN are in predecessor blocks that really are |   // Nothing else is interesting here. | ||||||
|   // dominated by the latch block.  Use the post-incremented value. |   return false; | ||||||
|   return true; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a | /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a | ||||||
| @@ -196,16 +110,9 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { | |||||||
|   const SCEV *ISE = SE->getSCEV(I); |   const SCEV *ISE = SE->getSCEV(I); | ||||||
|   if (isa<SCEVCouldNotCompute>(ISE)) return false; |   if (isa<SCEVCouldNotCompute>(ISE)) return false; | ||||||
|  |  | ||||||
|   // Get the start and stride for this expression. |   // If we've come to an uninteresting expression, stop the traversal and | ||||||
|   Loop *UseLoop = LI->getLoopFor(I->getParent()); |   // call this a user. | ||||||
|   const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType()); |   if (!isInteresting(ISE, I, L)) | ||||||
|   const SCEV *Stride = Start; |  | ||||||
|  |  | ||||||
|   if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) |  | ||||||
|     return false;  // Non-reducible symbolic expression, bail out. |  | ||||||
|  |  | ||||||
|   // Keep things simple. Don't touch loop-variant strides. |  | ||||||
|   if (!Stride->isLoopInvariant(L) && L->contains(I)) |  | ||||||
|     return false; |     return false; | ||||||
|  |  | ||||||
|   SmallPtrSet<Instruction *, 4> UniqueUsers; |   SmallPtrSet<Instruction *, 4> UniqueUsers; | ||||||
| @@ -241,27 +148,24 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (AddUserToIVUsers) { |     if (AddUserToIVUsers) { | ||||||
|       // Okay, we found a user that we cannot reduce.  Analyze the instruction |       // Okay, we found a user that we cannot reduce. | ||||||
|       // and decide what to do with it.  If we are a use inside of the loop, use |       IVUses.push_back(new IVStrideUse(this, ISE, User, I)); | ||||||
|       // the value before incrementation, otherwise use it after incrementation. |       IVStrideUse &NewUse = IVUses.back(); | ||||||
|       if (IVUseShouldUsePostIncValue(User, I, L, DT)) { |       // Transform the expression into a normalized form. | ||||||
|         // The value used will be incremented by the stride more than we are |       NewUse.Expr = | ||||||
|         // expecting, so subtract this off. |         TransformForPostIncUse(NormalizeAutodetect, NewUse.Expr, | ||||||
|         const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); |                                User, I, | ||||||
|         IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I)); |                                NewUse.PostIncLoops, | ||||||
|         IVUses.back().setIsUseOfPostIncrementedValue(true); |                                *SE, *DT); | ||||||
|         DEBUG(dbgs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n"); |       DEBUG(dbgs() << "   NORMALIZED TO: " << *NewUse.Expr << '\n'); | ||||||
|       } else { |  | ||||||
|         IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I)); |  | ||||||
|       } |  | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   return true; |   return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, | IVStrideUse &IVUsers::AddUser(const SCEV *Expr, | ||||||
|                               Instruction *User, Value *Operand) { |                               Instruction *User, Value *Operand) { | ||||||
|   IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand)); |   IVUses.push_back(new IVStrideUse(this, Expr, User, Operand)); | ||||||
|   return IVUses.back(); |   return IVUses.back(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -295,30 +199,10 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { | |||||||
| /// getReplacementExpr - Return a SCEV expression which computes the | /// getReplacementExpr - Return a SCEV expression which computes the | ||||||
| /// value of the OperandValToReplace of the given IVStrideUse. | /// value of the OperandValToReplace of the given IVStrideUse. | ||||||
| const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { | const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { | ||||||
|   // Start with zero. |   PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(U.PostIncLoops); | ||||||
|   const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); |   return TransformForPostIncUse(Denormalize, U.getExpr(), | ||||||
|   // Create the basic add recurrence. |                                 U.getUser(), U.getOperandValToReplace(), | ||||||
|   RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); |                                 Loops, *SE, *DT); | ||||||
|   // Add the offset in a separate step, because it may be loop-variant. |  | ||||||
|   RetVal = SE->getAddExpr(RetVal, U.getOffset()); |  | ||||||
|   // For uses of post-incremented values, add an extra stride to compute |  | ||||||
|   // the actual replacement value. |  | ||||||
|   if (U.isUseOfPostIncrementedValue()) |  | ||||||
|     RetVal = SE->getAddExpr(RetVal, U.getStride()); |  | ||||||
|   return RetVal; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// getCanonicalExpr - Return a SCEV expression which computes the |  | ||||||
| /// value of the SCEV of the given IVStrideUse, ignoring the  |  | ||||||
| /// isUseOfPostIncrementedValue flag. |  | ||||||
| const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const { |  | ||||||
|   // Start with zero. |  | ||||||
|   const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); |  | ||||||
|   // Create the basic add recurrence. |  | ||||||
|   RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); |  | ||||||
|   // Add the offset in a separate step, because it may be loop-variant. |  | ||||||
|   RetVal = SE->getAddExpr(RetVal, U.getOffset()); |  | ||||||
|   return RetVal; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void IVUsers::print(raw_ostream &OS, const Module *M) const { | void IVUsers::print(raw_ostream &OS, const Module *M) const { | ||||||
| @@ -339,8 +223,13 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { | |||||||
|     WriteAsOperand(OS, UI->getOperandValToReplace(), false); |     WriteAsOperand(OS, UI->getOperandValToReplace(), false); | ||||||
|     OS << " = " |     OS << " = " | ||||||
|        << *getReplacementExpr(*UI); |        << *getReplacementExpr(*UI); | ||||||
|     if (UI->isUseOfPostIncrementedValue()) |     for (PostIncLoopSet::const_iterator | ||||||
|       OS << " (post-inc)"; |          I = UI->PostIncLoops.begin(), | ||||||
|  |          E = UI->PostIncLoops.end(); I != E; ++I) { | ||||||
|  |       OS << " (post-inc with loop "; | ||||||
|  |       WriteAsOperand(OS, (*I)->getHeader(), false); | ||||||
|  |       OS << ")"; | ||||||
|  |     } | ||||||
|     OS << " in  "; |     OS << " in  "; | ||||||
|     UI->getUser()->print(OS, &Annotator); |     UI->getUser()->print(OS, &Annotator); | ||||||
|     OS << '\n'; |     OS << '\n'; | ||||||
| @@ -356,6 +245,39 @@ void IVUsers::releaseMemory() { | |||||||
|   IVUses.clear(); |   IVUses.clear(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { | ||||||
|  |   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { | ||||||
|  |     if (AR->getLoop() == L) | ||||||
|  |       return AR; | ||||||
|  |     return findAddRecForLoop(AR->getStart(), L); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { | ||||||
|  |     for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); | ||||||
|  |          I != E; ++I) | ||||||
|  |       if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) | ||||||
|  |         return AR; | ||||||
|  |     return 0; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | const SCEV *IVStrideUse::getStride(const Loop *L) const { | ||||||
|  |   if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(), L)) | ||||||
|  |     return AR->getStepRecurrence(*Parent->SE); | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void IVStrideUse::transformToPostInc(const Loop *L) { | ||||||
|  |   PostIncLoopSet Loops; | ||||||
|  |   Loops.insert(L); | ||||||
|  |   Expr = TransformForPostIncUse(Normalize, Expr, | ||||||
|  |                                 getUser(), getOperandValToReplace(), | ||||||
|  |                                 Loops, *Parent->SE, *Parent->DT); | ||||||
|  |   PostIncLoops.insert(L); | ||||||
|  | } | ||||||
|  |  | ||||||
| void IVStrideUse::deleted() { | void IVStrideUse::deleted() { | ||||||
|   // Remove this user from the list. |   // Remove this user from the list. | ||||||
|   Parent->IVUses.erase(this); |   Parent->IVUses.erase(this); | ||||||
|   | |||||||
| @@ -966,9 +966,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { | |||||||
|   // Determine a normalized form of this expression, which is the expression |   // Determine a normalized form of this expression, which is the expression | ||||||
|   // before any post-inc adjustment is made. |   // before any post-inc adjustment is made. | ||||||
|   const SCEVAddRecExpr *Normalized = S; |   const SCEVAddRecExpr *Normalized = S; | ||||||
|   if (L == PostIncLoop) { |   if (PostIncLoops.count(L)) { | ||||||
|     const SCEV *Step = S->getStepRecurrence(SE); |     PostIncLoopSet Loops; | ||||||
|     Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step)); |     Loops.insert(L); | ||||||
|  |     Normalized = | ||||||
|  |       cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, | ||||||
|  |                                                   Loops, SE, *SE.DT)); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // Strip off any non-loop-dominating component from the addrec start. |   // Strip off any non-loop-dominating component from the addrec start. | ||||||
| @@ -1002,7 +1005,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { | |||||||
|  |  | ||||||
|   // Accommodate post-inc mode, if necessary. |   // Accommodate post-inc mode, if necessary. | ||||||
|   Value *Result; |   Value *Result; | ||||||
|   if (L != PostIncLoop) |   if (!PostIncLoops.count(L)) | ||||||
|     Result = PN; |     Result = PN; | ||||||
|   else { |   else { | ||||||
|     // In PostInc mode, use the post-incremented value. |     // In PostInc mode, use the post-incremented value. | ||||||
| @@ -1274,7 +1277,7 @@ Value *SCEVExpander::expand(const SCEV *S) { | |||||||
|       // If the SCEV is computable at this level, insert it into the header |       // If the SCEV is computable at this level, insert it into the header | ||||||
|       // after the PHIs (and after any other instructions that we've inserted |       // after the PHIs (and after any other instructions that we've inserted | ||||||
|       // there) so that it is guaranteed to dominate any user inside the loop. |       // there) so that it is guaranteed to dominate any user inside the loop. | ||||||
|       if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop) |       if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L)) | ||||||
|         InsertPt = L->getHeader()->getFirstNonPHI(); |         InsertPt = L->getHeader()->getFirstNonPHI(); | ||||||
|       while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) |       while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) | ||||||
|         InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); |         InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); | ||||||
| @@ -1296,7 +1299,7 @@ Value *SCEVExpander::expand(const SCEV *S) { | |||||||
|   Value *V = visit(S); |   Value *V = visit(S); | ||||||
|  |  | ||||||
|   // Remember the expanded value for this SCEV at this location. |   // Remember the expanded value for this SCEV at this location. | ||||||
|   if (!PostIncLoop) |   if (PostIncLoops.empty()) | ||||||
|     InsertedExpressions[std::make_pair(S, InsertPt)] = V; |     InsertedExpressions[std::make_pair(S, InsertPt)] = V; | ||||||
|  |  | ||||||
|   restoreInsertPoint(SaveInsertBB, SaveInsertPt); |   restoreInsertPoint(SaveInsertBB, SaveInsertPt); | ||||||
| @@ -1304,7 +1307,7 @@ Value *SCEVExpander::expand(const SCEV *S) { | |||||||
| } | } | ||||||
|  |  | ||||||
| void SCEVExpander::rememberInstruction(Value *I) { | void SCEVExpander::rememberInstruction(Value *I) { | ||||||
|   if (!PostIncLoop) |   if (PostIncLoops.empty()) | ||||||
|     InsertedValues.insert(I); |     InsertedValues.insert(I); | ||||||
|  |  | ||||||
|   // If we just claimed an existing instruction and that instruction had |   // If we just claimed an existing instruction and that instruction had | ||||||
|   | |||||||
							
								
								
									
										150
									
								
								lib/Analysis/ScalarEvolutionNormalization.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								lib/Analysis/ScalarEvolutionNormalization.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,150 @@ | |||||||
|  | //===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===// | ||||||
|  | // | ||||||
|  | //                     The LLVM Compiler Infrastructure | ||||||
|  | // | ||||||
|  | // This file is distributed under the University of Illinois Open Source | ||||||
|  | // License. See LICENSE.TXT for details. | ||||||
|  | // | ||||||
|  | //===----------------------------------------------------------------------===// | ||||||
|  | // | ||||||
|  | // This file implements utilities for working with "normalized" expressions. | ||||||
|  | // See the comments at the top of ScalarEvolutionNormalization.h for details. | ||||||
|  | // | ||||||
|  | //===----------------------------------------------------------------------===// | ||||||
|  |  | ||||||
|  | #include "llvm/Analysis/Dominators.h" | ||||||
|  | #include "llvm/Analysis/LoopInfo.h" | ||||||
|  | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||||
|  | #include "llvm/Analysis/ScalarEvolutionNormalization.h" | ||||||
|  | using namespace llvm; | ||||||
|  |  | ||||||
|  | /// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression | ||||||
|  | /// and now we need to decide whether the user should use the preinc or post-inc | ||||||
|  | /// value.  If this user should use the post-inc version of the IV, return true. | ||||||
|  | /// | ||||||
|  | /// Choosing wrong here can break dominance properties (if we choose to use the | ||||||
|  | /// post-inc value when we cannot) or it can end up adding extra live-ranges to | ||||||
|  | /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we | ||||||
|  | /// should use the post-inc value). | ||||||
|  | static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, | ||||||
|  |                                        const Loop *L, DominatorTree *DT) { | ||||||
|  |   // If the user is in the loop, use the preinc value. | ||||||
|  |   if (L->contains(User)) return false; | ||||||
|  |  | ||||||
|  |   BasicBlock *LatchBlock = L->getLoopLatch(); | ||||||
|  |   if (!LatchBlock) | ||||||
|  |     return false; | ||||||
|  |  | ||||||
|  |   // Ok, the user is outside of the loop.  If it is dominated by the latch | ||||||
|  |   // block, use the post-inc value. | ||||||
|  |   if (DT->dominates(LatchBlock, User->getParent())) | ||||||
|  |     return true; | ||||||
|  |  | ||||||
|  |   // There is one case we have to be careful of: PHI nodes.  These little guys | ||||||
|  |   // can live in blocks that are not dominated by the latch block, but (since | ||||||
|  |   // their uses occur in the predecessor block, not the block the PHI lives in) | ||||||
|  |   // should still use the post-inc value.  Check for this case now. | ||||||
|  |   PHINode *PN = dyn_cast<PHINode>(User); | ||||||
|  |   if (!PN) return false;  // not a phi, not dominated by latch block. | ||||||
|  |  | ||||||
|  |   // Look at all of the uses of IV by the PHI node.  If any use corresponds to | ||||||
|  |   // a block that is not dominated by the latch block, give up and use the | ||||||
|  |   // preincremented value. | ||||||
|  |   unsigned NumUses = 0; | ||||||
|  |   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) | ||||||
|  |     if (PN->getIncomingValue(i) == IV) { | ||||||
|  |       ++NumUses; | ||||||
|  |       if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   // Okay, all uses of IV by PN are in predecessor blocks that really are | ||||||
|  |   // dominated by the latch block.  Use the post-incremented value. | ||||||
|  |   return true; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, | ||||||
|  |                                          const SCEV *S, | ||||||
|  |                                          Instruction *User, | ||||||
|  |                                          Value *OperandValToReplace, | ||||||
|  |                                          PostIncLoopSet &Loops, | ||||||
|  |                                          ScalarEvolution &SE, | ||||||
|  |                                          DominatorTree &DT) { | ||||||
|  |   if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) | ||||||
|  |     return S; | ||||||
|  |   if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { | ||||||
|  |     const SCEV *O = X->getOperand(); | ||||||
|  |     const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, | ||||||
|  |                                            Loops, SE, DT); | ||||||
|  |     if (O != N) | ||||||
|  |       switch (S->getSCEVType()) { | ||||||
|  |       case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); | ||||||
|  |       case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); | ||||||
|  |       case scTruncate: return SE.getTruncateExpr(N, S->getType()); | ||||||
|  |       default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); | ||||||
|  |       } | ||||||
|  |     return S; | ||||||
|  |   } | ||||||
|  |   if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { | ||||||
|  |     SmallVector<const SCEV *, 8> Operands; | ||||||
|  |     bool Changed = false; | ||||||
|  |     for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); | ||||||
|  |          I != E; ++I) { | ||||||
|  |       const SCEV *O = *I; | ||||||
|  |       const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, | ||||||
|  |                                              Loops, SE, DT); | ||||||
|  |       Changed |= N != O; | ||||||
|  |       Operands.push_back(N); | ||||||
|  |     } | ||||||
|  |     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { | ||||||
|  |       // An addrec. This is the interesting part. | ||||||
|  |       const Loop *L = AR->getLoop(); | ||||||
|  |       const SCEV *Result = SE.getAddRecExpr(Operands, L); | ||||||
|  |       switch (Kind) { | ||||||
|  |       default: llvm_unreachable("Unexpected transform name!"); | ||||||
|  |       case NormalizeAutodetect: | ||||||
|  |         if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace)) | ||||||
|  |           if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) { | ||||||
|  |             Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); | ||||||
|  |             Loops.insert(L); | ||||||
|  |           } | ||||||
|  |         break; | ||||||
|  |       case Normalize: | ||||||
|  |         if (Loops.count(L)) | ||||||
|  |           Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); | ||||||
|  |         break; | ||||||
|  |       case Denormalize: | ||||||
|  |         if (Loops.count(L)) { | ||||||
|  |           const SCEV *TransformedStep = | ||||||
|  |             TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), | ||||||
|  |                                    User, OperandValToReplace, Loops, SE, DT); | ||||||
|  |           Result = SE.getAddExpr(Result, TransformedStep); | ||||||
|  |         } | ||||||
|  |         break; | ||||||
|  |       } | ||||||
|  |       return Result; | ||||||
|  |     } | ||||||
|  |     if (Changed) | ||||||
|  |       switch (S->getSCEVType()) { | ||||||
|  |       case scAddExpr: return SE.getAddExpr(Operands); | ||||||
|  |       case scMulExpr: return SE.getMulExpr(Operands); | ||||||
|  |       case scSMaxExpr: return SE.getSMaxExpr(Operands); | ||||||
|  |       case scUMaxExpr: return SE.getUMaxExpr(Operands); | ||||||
|  |       default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); | ||||||
|  |       } | ||||||
|  |     return S; | ||||||
|  |   } | ||||||
|  |   if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { | ||||||
|  |     const SCEV *LO = X->getLHS(); | ||||||
|  |     const SCEV *RO = X->getRHS(); | ||||||
|  |     const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace, | ||||||
|  |                                             Loops, SE, DT); | ||||||
|  |     const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace, | ||||||
|  |                                             Loops, SE, DT); | ||||||
|  |     if (LO != LN || RO != RN) | ||||||
|  |       return SE.getUDivExpr(LN, RN); | ||||||
|  |     return S; | ||||||
|  |   } | ||||||
|  |   llvm_unreachable("Unexpected SCEV kind!"); | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
| @@ -454,6 +454,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { | |||||||
|   return Changed; |   return Changed; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // FIXME: It is an extremely bad idea to indvar substitute anything more | ||||||
|  | // complex than affine induction variables.  Doing so will put expensive | ||||||
|  | // polynomial evaluations inside of the loop, and the str reduction pass | ||||||
|  | // currently can only reduce affine polynomials.  For now just disable | ||||||
|  | // indvar subst on anything more complex than an affine addrec, unless | ||||||
|  | // it can be expanded to a trivial value. | ||||||
|  | static bool isSafe(const SCEV *S, const Loop *L) { | ||||||
|  |   // Loop-invariant values are safe. | ||||||
|  |   if (S->isLoopInvariant(L)) return true; | ||||||
|  |  | ||||||
|  |   // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how | ||||||
|  |   // to transform them into efficient code. | ||||||
|  |   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) | ||||||
|  |     return AR->isAffine(); | ||||||
|  |  | ||||||
|  |   // An add is safe it all its operands are safe. | ||||||
|  |   if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { | ||||||
|  |     for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), | ||||||
|  |          E = Commutative->op_end(); I != E; ++I) | ||||||
|  |       if (!isSafe(*I, L)) return false; | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   // A cast is safe if its operand is. | ||||||
|  |   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) | ||||||
|  |     return isSafe(C->getOperand(), L); | ||||||
|  |  | ||||||
|  |   // A udiv is safe if its operands are. | ||||||
|  |   if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) | ||||||
|  |     return isSafe(UD->getLHS(), L) && | ||||||
|  |            isSafe(UD->getRHS(), L); | ||||||
|  |  | ||||||
|  |   // SCEVUnknown is always safe. | ||||||
|  |   if (isa<SCEVUnknown>(S)) | ||||||
|  |     return true; | ||||||
|  |  | ||||||
|  |   // Nothing else is safe. | ||||||
|  |   return false; | ||||||
|  | } | ||||||
|  |  | ||||||
| void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { | void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { | ||||||
|   SmallVector<WeakVH, 16> DeadInsts; |   SmallVector<WeakVH, 16> DeadInsts; | ||||||
|  |  | ||||||
| @@ -465,7 +505,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { | |||||||
|   // the need for the code evaluation methods to insert induction variables |   // the need for the code evaluation methods to insert induction variables | ||||||
|   // of different sizes. |   // of different sizes. | ||||||
|   for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { |   for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { | ||||||
|     const SCEV *Stride = UI->getStride(); |  | ||||||
|     Value *Op = UI->getOperandValToReplace(); |     Value *Op = UI->getOperandValToReplace(); | ||||||
|     const Type *UseTy = Op->getType(); |     const Type *UseTy = Op->getType(); | ||||||
|     Instruction *User = UI->getUser(); |     Instruction *User = UI->getUser(); | ||||||
| @@ -486,7 +525,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { | |||||||
|     // currently can only reduce affine polynomials.  For now just disable |     // currently can only reduce affine polynomials.  For now just disable | ||||||
|     // indvar subst on anything more complex than an affine addrec, unless |     // indvar subst on anything more complex than an affine addrec, unless | ||||||
|     // it can be expanded to a trivial value. |     // it can be expanded to a trivial value. | ||||||
|     if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) |     if (!isSafe(AR, L)) | ||||||
|       continue; |       continue; | ||||||
|  |  | ||||||
|     // Determine the insertion point for this user. By default, insert |     // Determine the insertion point for this user. By default, insert | ||||||
|   | |||||||
| @@ -781,10 +781,10 @@ struct LSRFixup { | |||||||
|   /// will be replaced. |   /// will be replaced. | ||||||
|   Value *OperandValToReplace; |   Value *OperandValToReplace; | ||||||
|  |  | ||||||
|   /// PostIncLoop - If this user is to use the post-incremented value of an |   /// PostIncLoops - If this user is to use the post-incremented value of an | ||||||
|   /// induction variable, this variable is non-null and holds the loop |   /// induction variable, this variable is non-null and holds the loop | ||||||
|   /// associated with the induction variable. |   /// associated with the induction variable. | ||||||
|   const Loop *PostIncLoop; |   PostIncLoopSet PostIncLoops; | ||||||
|  |  | ||||||
|   /// LUIdx - The index of the LSRUse describing the expression which |   /// LUIdx - The index of the LSRUse describing the expression which | ||||||
|   /// this fixup needs, minus an offset (below). |   /// this fixup needs, minus an offset (below). | ||||||
| @@ -795,6 +795,8 @@ struct LSRFixup { | |||||||
|   /// offsets, for example in an unrolled loop. |   /// offsets, for example in an unrolled loop. | ||||||
|   int64_t Offset; |   int64_t Offset; | ||||||
|  |  | ||||||
|  |   bool isUseFullyOutsideLoop(const Loop *L) const; | ||||||
|  |  | ||||||
|   LSRFixup(); |   LSRFixup(); | ||||||
|  |  | ||||||
|   void print(raw_ostream &OS) const; |   void print(raw_ostream &OS) const; | ||||||
| @@ -804,9 +806,24 @@ struct LSRFixup { | |||||||
| } | } | ||||||
|  |  | ||||||
| LSRFixup::LSRFixup() | LSRFixup::LSRFixup() | ||||||
|   : UserInst(0), OperandValToReplace(0), PostIncLoop(0), |   : UserInst(0), OperandValToReplace(0), | ||||||
|     LUIdx(~size_t(0)), Offset(0) {} |     LUIdx(~size_t(0)), Offset(0) {} | ||||||
|  |  | ||||||
|  | /// isUseFullyOutsideLoop - Test whether this fixup always uses its | ||||||
|  | /// value outside of the given loop. | ||||||
|  | bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { | ||||||
|  |   // PHI nodes use their value in their incoming blocks. | ||||||
|  |   if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) { | ||||||
|  |     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) | ||||||
|  |       if (PN->getIncomingValue(i) == OperandValToReplace && | ||||||
|  |           L->contains(PN->getIncomingBlock(i))) | ||||||
|  |         return false; | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return !L->contains(UserInst); | ||||||
|  | } | ||||||
|  |  | ||||||
| void LSRFixup::print(raw_ostream &OS) const { | void LSRFixup::print(raw_ostream &OS) const { | ||||||
|   OS << "UserInst="; |   OS << "UserInst="; | ||||||
|   // Store is common and interesting enough to be worth special-casing. |   // Store is common and interesting enough to be worth special-casing. | ||||||
| @@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const { | |||||||
|   OS << ", OperandValToReplace="; |   OS << ", OperandValToReplace="; | ||||||
|   WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); |   WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); | ||||||
|  |  | ||||||
|   if (PostIncLoop) { |   for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(), | ||||||
|  |        E = PostIncLoops.end(); I != E; ++I) { | ||||||
|     OS << ", PostIncLoop="; |     OS << ", PostIncLoop="; | ||||||
|     WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false); |     WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if (LUIdx != ~size_t(0)) |   if (LUIdx != ~size_t(0)) | ||||||
| @@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() { | |||||||
|             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { |             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { | ||||||
|           // Conservatively assume there may be reuse if the quotient of their |           // Conservatively assume there may be reuse if the quotient of their | ||||||
|           // strides could be a legal scale. |           // strides could be a legal scale. | ||||||
|           const SCEV *A = CondUse->getStride(); |           const SCEV *A = CondUse->getStride(L); | ||||||
|           const SCEV *B = UI->getStride(); |           const SCEV *B = UI->getStride(L); | ||||||
|  |           if (!A || !B) continue; | ||||||
|           if (SE.getTypeSizeInBits(A->getType()) != |           if (SE.getTypeSizeInBits(A->getType()) != | ||||||
|               SE.getTypeSizeInBits(B->getType())) { |               SE.getTypeSizeInBits(B->getType())) { | ||||||
|             if (SE.getTypeSizeInBits(A->getType()) > |             if (SE.getTypeSizeInBits(A->getType()) > | ||||||
| @@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() { | |||||||
|         ExitingBlock->getInstList().insert(TermBr, Cond); |         ExitingBlock->getInstList().insert(TermBr, Cond); | ||||||
|  |  | ||||||
|         // Clone the IVUse, as the old use still exists! |         // Clone the IVUse, as the old use still exists! | ||||||
|         CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(), |         CondUse = &IU.AddUser(CondUse->getExpr(), | ||||||
|                               Cond, CondUse->getOperandValToReplace()); |                               Cond, CondUse->getOperandValToReplace()); | ||||||
|         TermBr->replaceUsesOfWith(OldCond, Cond); |         TermBr->replaceUsesOfWith(OldCond, Cond); | ||||||
|       } |       } | ||||||
| @@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() { | |||||||
|     // If we get to here, we know that we can transform the setcc instruction to |     // If we get to here, we know that we can transform the setcc instruction to | ||||||
|     // use the post-incremented version of the IV, allowing us to coalesce the |     // use the post-incremented version of the IV, allowing us to coalesce the | ||||||
|     // live ranges for the IV correctly. |     // live ranges for the IV correctly. | ||||||
|     CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), |     CondUse->transformToPostInc(L); | ||||||
|                                        CondUse->getStride())); |  | ||||||
|     CondUse->setIsUseOfPostIncrementedValue(true); |  | ||||||
|     Changed = true; |     Changed = true; | ||||||
|  |  | ||||||
|     PostIncs.insert(Cond); |     PostIncs.insert(Cond); | ||||||
| @@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() { | |||||||
|   SmallSetVector<const SCEV *, 4> Strides; |   SmallSetVector<const SCEV *, 4> Strides; | ||||||
|  |  | ||||||
|   // Collect interesting types and strides. |   // Collect interesting types and strides. | ||||||
|  |   SmallVector<const SCEV *, 4> Worklist; | ||||||
|   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { |   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { | ||||||
|     const SCEV *Stride = UI->getStride(); |     const SCEV *Expr = UI->getExpr(); | ||||||
|  |  | ||||||
|     // Collect interesting types. |     // Collect interesting types. | ||||||
|     Types.insert(SE.getEffectiveSCEVType(Stride->getType())); |     Types.insert(SE.getEffectiveSCEVType(Expr->getType())); | ||||||
|  |  | ||||||
|     // Add the stride for this loop. |     // Add strides for mentioned loops. | ||||||
|     Strides.insert(Stride); |     Worklist.push_back(Expr); | ||||||
|  |     do { | ||||||
|     // Add strides for other mentioned loops. |       const SCEV *S = Worklist.pop_back_val(); | ||||||
|     for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset()); |       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { | ||||||
|          AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart())) |         Strides.insert(AR->getStepRecurrence(SE)); | ||||||
|       Strides.insert(AR->getStepRecurrence(SE)); |         Worklist.push_back(AR->getStart()); | ||||||
|  |       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { | ||||||
|  |         Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); | ||||||
|  |       } | ||||||
|  |     } while (!Worklist.empty()); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // Compute interesting factors from the set of interesting strides. |   // Compute interesting factors from the set of interesting strides. | ||||||
| @@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { | |||||||
|     LSRFixup &LF = getNewFixup(); |     LSRFixup &LF = getNewFixup(); | ||||||
|     LF.UserInst = UI->getUser(); |     LF.UserInst = UI->getUser(); | ||||||
|     LF.OperandValToReplace = UI->getOperandValToReplace(); |     LF.OperandValToReplace = UI->getOperandValToReplace(); | ||||||
|     if (UI->isUseOfPostIncrementedValue()) |     LF.PostIncLoops = UI->getPostIncLoops(); | ||||||
|       LF.PostIncLoop = L; |  | ||||||
|  |  | ||||||
|     LSRUse::KindType Kind = LSRUse::Basic; |     LSRUse::KindType Kind = LSRUse::Basic; | ||||||
|     const Type *AccessTy = 0; |     const Type *AccessTy = 0; | ||||||
| @@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { | |||||||
|       AccessTy = getAccessType(LF.UserInst); |       AccessTy = getAccessType(LF.UserInst); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const SCEV *S = IU.getCanonicalExpr(*UI); |     const SCEV *S = UI->getExpr(); | ||||||
|  |  | ||||||
|     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as |     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as | ||||||
|     // (N - i == 0), and this allows (N - i) to be the expression that we work |     // (N - i == 0), and this allows (N - i) to be the expression that we work | ||||||
| @@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { | |||||||
|     LF.LUIdx = P.first; |     LF.LUIdx = P.first; | ||||||
|     LF.Offset = P.second; |     LF.Offset = P.second; | ||||||
|     LSRUse &LU = Uses[LF.LUIdx]; |     LSRUse &LU = Uses[LF.LUIdx]; | ||||||
|     LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst); |     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); | ||||||
|  |  | ||||||
|     // If this is the first use of this LSRUse, give it a formula. |     // If this is the first use of this LSRUse, give it a formula. | ||||||
|     if (LU.Formulae.empty()) { |     if (LU.Formulae.empty()) { | ||||||
| @@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { | |||||||
|         LF.LUIdx = P.first; |         LF.LUIdx = P.first; | ||||||
|         LF.Offset = P.second; |         LF.Offset = P.second; | ||||||
|         LSRUse &LU = Uses[LF.LUIdx]; |         LSRUse &LU = Uses[LF.LUIdx]; | ||||||
|         LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst); |         LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); | ||||||
|         InsertSupplementalFormula(U, LU, LF.LUIdx); |         InsertSupplementalFormula(U, LU, LF.LUIdx); | ||||||
|         CountRegisters(LU.Formulae.back(), Uses.size() - 1); |         CountRegisters(LU.Formulae.back(), Uses.size() - 1); | ||||||
|         break; |         break; | ||||||
| @@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | |||||||
|                            SmallVectorImpl<WeakVH> &DeadInsts) const { |                            SmallVectorImpl<WeakVH> &DeadInsts) const { | ||||||
|   const LSRUse &LU = Uses[LF.LUIdx]; |   const LSRUse &LU = Uses[LF.LUIdx]; | ||||||
|  |  | ||||||
|   // Then, collect some instructions which we will remain dominated by when |   // Then, collect some instructions which must be dominated by the | ||||||
|   // expanding the replacement. These must be dominated by any operands that |   // expanding replacement. These must be dominated by any operands that | ||||||
|   // will be required in the expansion. |   // will be required in the expansion. | ||||||
|   SmallVector<Instruction *, 4> Inputs; |   SmallVector<Instruction *, 4> Inputs; | ||||||
|   if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) |   if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) | ||||||
| @@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | |||||||
|     if (Instruction *I = |     if (Instruction *I = | ||||||
|           dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) |           dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) | ||||||
|       Inputs.push_back(I); |       Inputs.push_back(I); | ||||||
|   if (LF.PostIncLoop) { |   if (LF.PostIncLoops.count(L)) { | ||||||
|     if (!L->contains(LF.UserInst)) |     if (LF.isUseFullyOutsideLoop(L)) | ||||||
|       Inputs.push_back(L->getLoopLatch()->getTerminator()); |       Inputs.push_back(L->getLoopLatch()->getTerminator()); | ||||||
|     else |     else | ||||||
|       Inputs.push_back(IVIncInsertPos); |       Inputs.push_back(IVIncInsertPos); | ||||||
| @@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | |||||||
|  |  | ||||||
|   // Inform the Rewriter if we have a post-increment use, so that it can |   // Inform the Rewriter if we have a post-increment use, so that it can | ||||||
|   // perform an advantageous expansion. |   // perform an advantageous expansion. | ||||||
|   Rewriter.setPostInc(LF.PostIncLoop); |   Rewriter.setPostInc(LF.PostIncLoops); | ||||||
|  |  | ||||||
|   // This is the type that the user actually needs. |   // This is the type that the user actually needs. | ||||||
|   const Type *OpTy = LF.OperandValToReplace->getType(); |   const Type *OpTy = LF.OperandValToReplace->getType(); | ||||||
| @@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | |||||||
|     const SCEV *Reg = *I; |     const SCEV *Reg = *I; | ||||||
|     assert(!Reg->isZero() && "Zero allocated in a base register!"); |     assert(!Reg->isZero() && "Zero allocated in a base register!"); | ||||||
|  |  | ||||||
|     // If we're expanding for a post-inc user for the add-rec's loop, make the |     // If we're expanding for a post-inc user, make the post-inc adjustment. | ||||||
|     // post-inc adjustment. |     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); | ||||||
|     const SCEV *Start = Reg; |     Reg = TransformForPostIncUse(Denormalize, Reg, | ||||||
|     while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) { |                                  LF.UserInst, LF.OperandValToReplace, | ||||||
|       if (AR->getLoop() == LF.PostIncLoop) { |                                  Loops, SE, DT); | ||||||
|         Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE)); |  | ||||||
|         // If the user is inside the loop, insert the code after the increment |  | ||||||
|         // so that it is dominated by its operand. If the original insert point |  | ||||||
|         // was already dominated by the increment, keep it, because there may |  | ||||||
|         // be loop-variant operands that need to be respected also. |  | ||||||
|         if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) { |  | ||||||
|           IP = IVIncInsertPos; |  | ||||||
|           while (isa<DbgInfoIntrinsic>(IP)) ++IP; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|       } |  | ||||||
|       Start = AR->getStart(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); |     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); | ||||||
|   } |   } | ||||||
| @@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | |||||||
|   if (F.AM.Scale != 0) { |   if (F.AM.Scale != 0) { | ||||||
|     const SCEV *ScaledS = F.ScaledReg; |     const SCEV *ScaledS = F.ScaledReg; | ||||||
|  |  | ||||||
|     // If we're expanding for a post-inc user for the add-rec's loop, make the |     // If we're expanding for a post-inc user, make the post-inc adjustment. | ||||||
|     // post-inc adjustment. |     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); | ||||||
|     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS)) |     ScaledS = TransformForPostIncUse(Denormalize, ScaledS, | ||||||
|       if (AR->getLoop() == LF.PostIncLoop) |                                      LF.UserInst, LF.OperandValToReplace, | ||||||
|         ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE)); |                                      Loops, SE, DT); | ||||||
|  |  | ||||||
|     if (LU.Kind == LSRUse::ICmpZero) { |     if (LU.Kind == LSRUse::ICmpZero) { | ||||||
|       // An interesting way of "folding" with an icmp is to use a negated |       // An interesting way of "folding" with an icmp is to use a negated | ||||||
| @@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, | |||||||
|   Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); |   Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); | ||||||
|  |  | ||||||
|   // We're done expanding now, so reset the rewriter. |   // We're done expanding now, so reset the rewriter. | ||||||
|   Rewriter.setPostInc(0); |   Rewriter.clearPostInc(); | ||||||
|  |  | ||||||
|   // An ICmpZero Formula represents an ICmp which we're handling as a |   // An ICmpZero Formula represents an ICmp which we're handling as a | ||||||
|   // comparison against zero. Now that we've expanded an expression for that |   // comparison against zero. Now that we've expanded an expression for that | ||||||
|   | |||||||
							
								
								
									
										277
									
								
								test/CodeGen/X86/multiple-loop-post-inc.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										277
									
								
								test/CodeGen/X86/multiple-loop-post-inc.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,277 @@ | |||||||
|  | ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s | ||||||
|  | ; rdar://7236213 | ||||||
|  |  | ||||||
|  | ; CodeGen shouldn't require any lea instructions inside the marked loop. | ||||||
|  | ; It should properly set up post-increment uses and do coalescing for | ||||||
|  | ; the induction variables. | ||||||
|  |  | ||||||
|  | ; CHECK: # Start | ||||||
|  | ; CHECK-NOT: lea | ||||||
|  | ; CHECK: # Stop | ||||||
|  |  | ||||||
|  | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" | ||||||
|  |  | ||||||
|  | define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %Step, float* %O, i64 %OS, i64 %N) nounwind { | ||||||
|  | entry: | ||||||
|  |   %times4 = alloca float, align 4                 ; <float*> [#uses=3] | ||||||
|  |   %timesN = alloca float, align 4                 ; <float*> [#uses=2] | ||||||
|  |   %0 = load float* %Step, align 4                 ; <float> [#uses=8] | ||||||
|  |   %1 = ptrtoint float* %I to i64                  ; <i64> [#uses=1] | ||||||
|  |   %2 = ptrtoint float* %O to i64                  ; <i64> [#uses=1] | ||||||
|  |   %tmp = xor i64 %2, %1                           ; <i64> [#uses=1] | ||||||
|  |   %tmp16 = and i64 %tmp, 15                       ; <i64> [#uses=1] | ||||||
|  |   %3 = icmp eq i64 %tmp16, 0                      ; <i1> [#uses=1] | ||||||
|  |   %4 = trunc i64 %IS to i32                       ; <i32> [#uses=1] | ||||||
|  |   %5 = xor i32 %4, 1                              ; <i32> [#uses=1] | ||||||
|  |   %6 = trunc i64 %OS to i32                       ; <i32> [#uses=1] | ||||||
|  |   %7 = xor i32 %6, 1                              ; <i32> [#uses=1] | ||||||
|  |   %8 = or i32 %7, %5                              ; <i32> [#uses=1] | ||||||
|  |   %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1] | ||||||
|  |   br i1 %9, label %bb, label %return | ||||||
|  |  | ||||||
|  | bb:                                               ; preds = %entry | ||||||
|  |   %10 = load float* %Start, align 4               ; <float> [#uses=1] | ||||||
|  |   br label %bb2 | ||||||
|  |  | ||||||
|  | bb1:                                              ; preds = %bb3 | ||||||
|  |   %11 = load float* %I_addr.0, align 4            ; <float> [#uses=1] | ||||||
|  |   %12 = fmul float %11, %x.0                      ; <float> [#uses=1] | ||||||
|  |   store float %12, float* %O_addr.0, align 4 | ||||||
|  |   %13 = fadd float %x.0, %0                       ; <float> [#uses=1] | ||||||
|  |   %indvar.next53 = add i64 %14, 1                 ; <i64> [#uses=1] | ||||||
|  |   br label %bb2 | ||||||
|  |  | ||||||
|  | bb2:                                              ; preds = %bb1, %bb | ||||||
|  |   %14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ; <i64> [#uses=21] | ||||||
|  |   %x.0 = phi float [ %13, %bb1 ], [ %10, %bb ]    ; <float> [#uses=6] | ||||||
|  |   %N_addr.0 = sub i64 %N, %14                     ; <i64> [#uses=4] | ||||||
|  |   %O_addr.0 = getelementptr float* %O, i64 %14    ; <float*> [#uses=4] | ||||||
|  |   %I_addr.0 = getelementptr float* %I, i64 %14    ; <float*> [#uses=3] | ||||||
|  |   %15 = icmp slt i64 %N_addr.0, 1                 ; <i1> [#uses=1] | ||||||
|  |   br i1 %15, label %bb4, label %bb3 | ||||||
|  |  | ||||||
|  | bb3:                                              ; preds = %bb2 | ||||||
|  |   %16 = ptrtoint float* %O_addr.0 to i64          ; <i64> [#uses=1] | ||||||
|  |   %17 = and i64 %16, 15                           ; <i64> [#uses=1] | ||||||
|  |   %18 = icmp eq i64 %17, 0                        ; <i1> [#uses=1] | ||||||
|  |   br i1 %18, label %bb4, label %bb1 | ||||||
|  |  | ||||||
|  | bb4:                                              ; preds = %bb3, %bb2 | ||||||
|  |   %19 = fmul float %0, 4.000000e+00               ; <float> [#uses=1] | ||||||
|  |   store float %19, float* %times4, align 4 | ||||||
|  |   %20 = fmul float %0, 1.600000e+01               ; <float> [#uses=1] | ||||||
|  |   store float %20, float* %timesN, align 4 | ||||||
|  |   %21 = fmul float %0, 0.000000e+00               ; <float> [#uses=1] | ||||||
|  |   %22 = fadd float %21, %x.0                      ; <float> [#uses=1] | ||||||
|  |   %23 = fadd float %x.0, %0                       ; <float> [#uses=1] | ||||||
|  |   %24 = fmul float %0, 2.000000e+00               ; <float> [#uses=1] | ||||||
|  |   %25 = fadd float %24, %x.0                      ; <float> [#uses=1] | ||||||
|  |   %26 = fmul float %0, 3.000000e+00               ; <float> [#uses=1] | ||||||
|  |   %27 = fadd float %26, %x.0                      ; <float> [#uses=1] | ||||||
|  |   %28 = insertelement <4 x float> undef, float %22, i32 0 ; <<4 x float>> [#uses=1] | ||||||
|  |   %29 = insertelement <4 x float> %28, float %23, i32 1 ; <<4 x float>> [#uses=1] | ||||||
|  |   %30 = insertelement <4 x float> %29, float %25, i32 2 ; <<4 x float>> [#uses=1] | ||||||
|  |   %31 = insertelement <4 x float> %30, float %27, i32 3 ; <<4 x float>> [#uses=5] | ||||||
|  |   %asmtmp.i = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=3] | ||||||
|  |   %32 = fadd <4 x float> %31, %asmtmp.i           ; <<4 x float>> [#uses=3] | ||||||
|  |   %33 = fadd <4 x float> %32, %asmtmp.i           ; <<4 x float>> [#uses=3] | ||||||
|  |   %34 = fadd <4 x float> %33, %asmtmp.i           ; <<4 x float>> [#uses=2] | ||||||
|  |   %asmtmp.i18 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %timesN) nounwind ; <<4 x float>> [#uses=8] | ||||||
|  |   %35 = icmp sgt i64 %N_addr.0, 15                ; <i1> [#uses=2] | ||||||
|  |   br i1 %3, label %bb6.preheader, label %bb8 | ||||||
|  |  | ||||||
|  | bb6.preheader:                                    ; preds = %bb4 | ||||||
|  |   br i1 %35, label %bb.nph43, label %bb7 | ||||||
|  |  | ||||||
|  | bb.nph43:                                         ; preds = %bb6.preheader | ||||||
|  |   %tmp108 = add i64 %14, 16                       ; <i64> [#uses=1] | ||||||
|  |   %tmp111 = add i64 %14, 4                        ; <i64> [#uses=1] | ||||||
|  |   %tmp115 = add i64 %14, 8                        ; <i64> [#uses=1] | ||||||
|  |   %tmp119 = add i64 %14, 12                       ; <i64> [#uses=1] | ||||||
|  |   %tmp134 = add i64 %N, -16                       ; <i64> [#uses=1] | ||||||
|  |   %tmp135 = sub i64 %tmp134, %14                  ; <i64> [#uses=1] | ||||||
|  |   call void asm sideeffect "# Start.", "~{dirflag},~{fpsr},~{flags}"() nounwind | ||||||
|  |   br label %bb5 | ||||||
|  |  | ||||||
|  | bb5:                                              ; preds = %bb.nph43, %bb5 | ||||||
|  |   %indvar102 = phi i64 [ 0, %bb.nph43 ], [ %indvar.next103, %bb5 ] ; <i64> [#uses=3] | ||||||
|  |   %vX3.041 = phi <4 x float> [ %34, %bb.nph43 ], [ %45, %bb5 ] ; <<4 x float>> [#uses=2] | ||||||
|  |   %vX0.039 = phi <4 x float> [ %31, %bb.nph43 ], [ %41, %bb5 ] ; <<4 x float>> [#uses=2] | ||||||
|  |   %vX2.037 = phi <4 x float> [ %33, %bb.nph43 ], [ %46, %bb5 ] ; <<4 x float>> [#uses=2] | ||||||
|  |   %vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2] | ||||||
|  |   %tmp104 = shl i64 %indvar102, 4                 ; <i64> [#uses=5] | ||||||
|  |   %tmp105 = add i64 %14, %tmp104                  ; <i64> [#uses=2] | ||||||
|  |   %scevgep106 = getelementptr float* %I, i64 %tmp105 ; <float*> [#uses=1] | ||||||
|  |   %scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp109 = add i64 %tmp108, %tmp104              ; <i64> [#uses=2] | ||||||
|  |   %tmp112 = add i64 %tmp111, %tmp104              ; <i64> [#uses=2] | ||||||
|  |   %scevgep113 = getelementptr float* %I, i64 %tmp112 ; <float*> [#uses=1] | ||||||
|  |   %scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp116 = add i64 %tmp115, %tmp104              ; <i64> [#uses=2] | ||||||
|  |   %scevgep117 = getelementptr float* %I, i64 %tmp116 ; <float*> [#uses=1] | ||||||
|  |   %scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp120 = add i64 %tmp119, %tmp104              ; <i64> [#uses=2] | ||||||
|  |   %scevgep121 = getelementptr float* %I, i64 %tmp120 ; <float*> [#uses=1] | ||||||
|  |   %scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %scevgep123 = getelementptr float* %O, i64 %tmp105 ; <float*> [#uses=1] | ||||||
|  |   %scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %scevgep126 = getelementptr float* %O, i64 %tmp112 ; <float*> [#uses=1] | ||||||
|  |   %scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %scevgep128 = getelementptr float* %O, i64 %tmp116 ; <float*> [#uses=1] | ||||||
|  |   %scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %scevgep130 = getelementptr float* %O, i64 %tmp120 ; <float*> [#uses=1] | ||||||
|  |   %scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp132 = mul i64 %indvar102, -16               ; <i64> [#uses=1] | ||||||
|  |   %tmp136 = add i64 %tmp135, %tmp132              ; <i64> [#uses=2] | ||||||
|  |   %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1] | ||||||
|  |   %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1] | ||||||
|  |   %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1] | ||||||
|  |   %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1] | ||||||
|  |   %40 = fmul <4 x float> %36, %vX0.039            ; <<4 x float>> [#uses=1] | ||||||
|  |   %41 = fadd <4 x float> %vX0.039, %asmtmp.i18    ; <<4 x float>> [#uses=2] | ||||||
|  |   %42 = fmul <4 x float> %37, %vX1.036            ; <<4 x float>> [#uses=1] | ||||||
|  |   %43 = fmul <4 x float> %38, %vX2.037            ; <<4 x float>> [#uses=1] | ||||||
|  |   %44 = fmul <4 x float> %39, %vX3.041            ; <<4 x float>> [#uses=1] | ||||||
|  |   store <4 x float> %40, <4 x float>* %scevgep123124, align 16 | ||||||
|  |   store <4 x float> %42, <4 x float>* %scevgep126127, align 16 | ||||||
|  |   store <4 x float> %43, <4 x float>* %scevgep128129, align 16 | ||||||
|  |   store <4 x float> %44, <4 x float>* %scevgep130131, align 16 | ||||||
|  |   %45 = fadd <4 x float> %vX3.041, %asmtmp.i18    ; <<4 x float>> [#uses=1] | ||||||
|  |   %46 = fadd <4 x float> %vX2.037, %asmtmp.i18    ; <<4 x float>> [#uses=1] | ||||||
|  |   %47 = fadd <4 x float> %vX1.036, %asmtmp.i18    ; <<4 x float>> [#uses=1] | ||||||
|  |   %48 = icmp sgt i64 %tmp136, 15                  ; <i1> [#uses=1] | ||||||
|  |   %indvar.next103 = add i64 %indvar102, 1         ; <i64> [#uses=1] | ||||||
|  |   br i1 %48, label %bb5, label %bb6.bb7_crit_edge | ||||||
|  |  | ||||||
|  | bb6.bb7_crit_edge:                                ; preds = %bb5 | ||||||
|  |   call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind | ||||||
|  |   %scevgep110 = getelementptr float* %I, i64 %tmp109 ; <float*> [#uses=1] | ||||||
|  |   %scevgep125 = getelementptr float* %O, i64 %tmp109 ; <float*> [#uses=1] | ||||||
|  |   br label %bb7 | ||||||
|  |  | ||||||
|  | bb7:                                              ; preds = %bb6.bb7_crit_edge, %bb6.preheader | ||||||
|  |   %I_addr.1.lcssa = phi float* [ %scevgep110, %bb6.bb7_crit_edge ], [ %I_addr.0, %bb6.preheader ] ; <float*> [#uses=1] | ||||||
|  |   %O_addr.1.lcssa = phi float* [ %scevgep125, %bb6.bb7_crit_edge ], [ %O_addr.0, %bb6.preheader ] ; <float*> [#uses=1] | ||||||
|  |   %vX0.0.lcssa = phi <4 x float> [ %41, %bb6.bb7_crit_edge ], [ %31, %bb6.preheader ] ; <<4 x float>> [#uses=1] | ||||||
|  |   %N_addr.1.lcssa = phi i64 [ %tmp136, %bb6.bb7_crit_edge ], [ %N_addr.0, %bb6.preheader ] ; <i64> [#uses=1] | ||||||
|  |   %asmtmp.i17 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=0] | ||||||
|  |   br label %bb11 | ||||||
|  |  | ||||||
|  | bb8:                                              ; preds = %bb4 | ||||||
|  |   br i1 %35, label %bb.nph, label %bb11 | ||||||
|  |  | ||||||
|  | bb.nph:                                           ; preds = %bb8 | ||||||
|  |   %I_addr.0.sum = add i64 %14, -1                 ; <i64> [#uses=1] | ||||||
|  |   %49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1] | ||||||
|  |   %50 = bitcast float* %49 to <4 x float>*        ; <<4 x float>*> [#uses=1] | ||||||
|  |   %51 = load <4 x float>* %50, align 16           ; <<4 x float>> [#uses=1] | ||||||
|  |   %tmp54 = add i64 %14, 16                        ; <i64> [#uses=1] | ||||||
|  |   %tmp56 = add i64 %14, 3                         ; <i64> [#uses=1] | ||||||
|  |   %tmp60 = add i64 %14, 7                         ; <i64> [#uses=1] | ||||||
|  |   %tmp64 = add i64 %14, 11                        ; <i64> [#uses=1] | ||||||
|  |   %tmp68 = add i64 %14, 15                        ; <i64> [#uses=1] | ||||||
|  |   %tmp76 = add i64 %14, 4                         ; <i64> [#uses=1] | ||||||
|  |   %tmp80 = add i64 %14, 8                         ; <i64> [#uses=1] | ||||||
|  |   %tmp84 = add i64 %14, 12                        ; <i64> [#uses=1] | ||||||
|  |   %tmp90 = add i64 %N, -16                        ; <i64> [#uses=1] | ||||||
|  |   %tmp91 = sub i64 %tmp90, %14                    ; <i64> [#uses=1] | ||||||
|  |   br label %bb9 | ||||||
|  |  | ||||||
|  | bb9:                                              ; preds = %bb.nph, %bb9 | ||||||
|  |   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb9 ] ; <i64> [#uses=3] | ||||||
|  |   %vX3.125 = phi <4 x float> [ %34, %bb.nph ], [ %69, %bb9 ] ; <<4 x float>> [#uses=2] | ||||||
|  |   %vX0.223 = phi <4 x float> [ %31, %bb.nph ], [ %65, %bb9 ] ; <<4 x float>> [#uses=2] | ||||||
|  |   %vX2.121 = phi <4 x float> [ %33, %bb.nph ], [ %70, %bb9 ] ; <<4 x float>> [#uses=2] | ||||||
|  |   %vX1.120 = phi <4 x float> [ %32, %bb.nph ], [ %71, %bb9 ] ; <<4 x float>> [#uses=2] | ||||||
|  |   %vI0.019 = phi <4 x float> [ %51, %bb.nph ], [ %55, %bb9 ] ; <<4 x float>> [#uses=1] | ||||||
|  |   %tmp51 = shl i64 %indvar, 4                     ; <i64> [#uses=9] | ||||||
|  |   %tmp55 = add i64 %tmp54, %tmp51                 ; <i64> [#uses=2] | ||||||
|  |   %tmp57 = add i64 %tmp56, %tmp51                 ; <i64> [#uses=1] | ||||||
|  |   %scevgep58 = getelementptr float* %I, i64 %tmp57 ; <float*> [#uses=1] | ||||||
|  |   %scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp61 = add i64 %tmp60, %tmp51                 ; <i64> [#uses=1] | ||||||
|  |   %scevgep62 = getelementptr float* %I, i64 %tmp61 ; <float*> [#uses=1] | ||||||
|  |   %scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp65 = add i64 %tmp64, %tmp51                 ; <i64> [#uses=1] | ||||||
|  |   %scevgep66 = getelementptr float* %I, i64 %tmp65 ; <float*> [#uses=1] | ||||||
|  |   %scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp69 = add i64 %tmp68, %tmp51                 ; <i64> [#uses=1] | ||||||
|  |   %scevgep70 = getelementptr float* %I, i64 %tmp69 ; <float*> [#uses=1] | ||||||
|  |   %scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp72 = add i64 %14, %tmp51                    ; <i64> [#uses=1] | ||||||
|  |   %scevgep73 = getelementptr float* %O, i64 %tmp72 ; <float*> [#uses=1] | ||||||
|  |   %scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp77 = add i64 %tmp76, %tmp51                 ; <i64> [#uses=1] | ||||||
|  |   %scevgep78 = getelementptr float* %O, i64 %tmp77 ; <float*> [#uses=1] | ||||||
|  |   %scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp81 = add i64 %tmp80, %tmp51                 ; <i64> [#uses=1] | ||||||
|  |   %scevgep82 = getelementptr float* %O, i64 %tmp81 ; <float*> [#uses=1] | ||||||
|  |   %scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp85 = add i64 %tmp84, %tmp51                 ; <i64> [#uses=1] | ||||||
|  |   %scevgep86 = getelementptr float* %O, i64 %tmp85 ; <float*> [#uses=1] | ||||||
|  |   %scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1] | ||||||
|  |   %tmp88 = mul i64 %indvar, -16                   ; <i64> [#uses=1] | ||||||
|  |   %tmp92 = add i64 %tmp91, %tmp88                 ; <i64> [#uses=2] | ||||||
|  |   %52 = load <4 x float>* %scevgep5859, align 16  ; <<4 x float>> [#uses=2] | ||||||
|  |   %53 = load <4 x float>* %scevgep6263, align 16  ; <<4 x float>> [#uses=2] | ||||||
|  |   %54 = load <4 x float>* %scevgep6667, align 16  ; <<4 x float>> [#uses=2] | ||||||
|  |   %55 = load <4 x float>* %scevgep7071, align 16  ; <<4 x float>> [#uses=2] | ||||||
|  |   %56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] | ||||||
|  |   %57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] | ||||||
|  |   %58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] | ||||||
|  |   %59 = shufflevector <4 x float> %58, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] | ||||||
|  |   %60 = shufflevector <4 x float> %53, <4 x float> %54, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] | ||||||
|  |   %61 = shufflevector <4 x float> %60, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] | ||||||
|  |   %62 = shufflevector <4 x float> %54, <4 x float> %55, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] | ||||||
|  |   %63 = shufflevector <4 x float> %62, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] | ||||||
|  |   %64 = fmul <4 x float> %57, %vX0.223            ; <<4 x float>> [#uses=1] | ||||||
|  |   %65 = fadd <4 x float> %vX0.223, %asmtmp.i18    ; <<4 x float>> [#uses=2] | ||||||
|  |   %66 = fmul <4 x float> %59, %vX1.120            ; <<4 x float>> [#uses=1] | ||||||
|  |   %67 = fmul <4 x float> %61, %vX2.121            ; <<4 x float>> [#uses=1] | ||||||
|  |   %68 = fmul <4 x float> %63, %vX3.125            ; <<4 x float>> [#uses=1] | ||||||
|  |   store <4 x float> %64, <4 x float>* %scevgep7374, align 16 | ||||||
|  |   store <4 x float> %66, <4 x float>* %scevgep7879, align 16 | ||||||
|  |   store <4 x float> %67, <4 x float>* %scevgep8283, align 16 | ||||||
|  |   store <4 x float> %68, <4 x float>* %scevgep8687, align 16 | ||||||
|  |   %69 = fadd <4 x float> %vX3.125, %asmtmp.i18    ; <<4 x float>> [#uses=1] | ||||||
|  |   %70 = fadd <4 x float> %vX2.121, %asmtmp.i18    ; <<4 x float>> [#uses=1] | ||||||
|  |   %71 = fadd <4 x float> %vX1.120, %asmtmp.i18    ; <<4 x float>> [#uses=1] | ||||||
|  |   %72 = icmp sgt i64 %tmp92, 15                   ; <i1> [#uses=1] | ||||||
|  |   %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1] | ||||||
|  |   br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge | ||||||
|  |  | ||||||
|  | bb10.bb11.loopexit_crit_edge:                     ; preds = %bb9 | ||||||
|  |   %scevgep = getelementptr float* %I, i64 %tmp55  ; <float*> [#uses=1] | ||||||
|  |   %scevgep75 = getelementptr float* %O, i64 %tmp55 ; <float*> [#uses=1] | ||||||
|  |   br label %bb11 | ||||||
|  |  | ||||||
|  | bb11:                                             ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7 | ||||||
|  |   %N_addr.2 = phi i64 [ %N_addr.1.lcssa, %bb7 ], [ %tmp92, %bb10.bb11.loopexit_crit_edge ], [ %N_addr.0, %bb8 ] ; <i64> [#uses=2] | ||||||
|  |   %vX0.1 = phi <4 x float> [ %vX0.0.lcssa, %bb7 ], [ %65, %bb10.bb11.loopexit_crit_edge ], [ %31, %bb8 ] ; <<4 x float>> [#uses=1] | ||||||
|  |   %O_addr.2 = phi float* [ %O_addr.1.lcssa, %bb7 ], [ %scevgep75, %bb10.bb11.loopexit_crit_edge ], [ %O_addr.0, %bb8 ] ; <float*> [#uses=1] | ||||||
|  |   %I_addr.2 = phi float* [ %I_addr.1.lcssa, %bb7 ], [ %scevgep, %bb10.bb11.loopexit_crit_edge ], [ %I_addr.0, %bb8 ] ; <float*> [#uses=1] | ||||||
|  |   %73 = extractelement <4 x float> %vX0.1, i32 0  ; <float> [#uses=2] | ||||||
|  |   %74 = icmp sgt i64 %N_addr.2, 0                 ; <i1> [#uses=1] | ||||||
|  |   br i1 %74, label %bb12, label %bb14 | ||||||
|  |  | ||||||
|  | bb12:                                             ; preds = %bb11, %bb12 | ||||||
|  |   %indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ; <i64> [#uses=3] | ||||||
|  |   %x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2] | ||||||
|  |   %I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1] | ||||||
|  |   %O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1] | ||||||
|  |   %75 = load float* %I_addr.433, align 4          ; <float> [#uses=1] | ||||||
|  |   %76 = fmul float %75, %x.130                    ; <float> [#uses=1] | ||||||
|  |   store float %76, float* %O_addr.432, align 4 | ||||||
|  |   %77 = fadd float %x.130, %0                     ; <float> [#uses=2] | ||||||
|  |   %indvar.next95 = add i64 %indvar94, 1           ; <i64> [#uses=2] | ||||||
|  |   %exitcond = icmp eq i64 %indvar.next95, %N_addr.2 ; <i1> [#uses=1] | ||||||
|  |   br i1 %exitcond, label %bb14, label %bb12 | ||||||
|  |  | ||||||
|  | bb14:                                             ; preds = %bb12, %bb11 | ||||||
|  |   %x.1.lcssa = phi float [ %73, %bb11 ], [ %77, %bb12 ] ; <float> [#uses=1] | ||||||
|  |   store float %x.1.lcssa, float* %Start, align 4 | ||||||
|  |   ret void | ||||||
|  |  | ||||||
|  | return:                                           ; preds = %entry | ||||||
|  |   ret void | ||||||
|  | } | ||||||
| @@ -1,4 +1,4 @@ | |||||||
| ; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc)} | ; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)} | ||||||
|  |  | ||||||
| ; The value of %r is dependent on a polynomial iteration expression. | ; The value of %r is dependent on a polynomial iteration expression. | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user