Generalize IVUsers to track arbitrary expressions rather than expressions

explicitly split into stride-and-offset pairs. Also, add the
ability to track multiple post-increment loops on the same expression.

This refines the concept of "normalizing" SCEV expressions used for
to post-increment uses, and introduces a dedicated utility routine for
normalizing and denormalizing expressions.

This fixes the expansion of expressions which are post-increment users
of more than one loop at a time. More broadly, this takes LSR another
step closer to being able to reason about more than one loop at a time.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100699 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman
2010-04-07 22:27:08 +00:00
parent b72e59e361
commit 448db1cdef
10 changed files with 743 additions and 279 deletions

View File

@@ -16,6 +16,7 @@
#define LLVM_ANALYSIS_IVUSERS_H #define LLVM_ANALYSIS_IVUSERS_H
#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
#include "llvm/Support/ValueHandle.h" #include "llvm/Support/ValueHandle.h"
namespace llvm { namespace llvm {
@@ -26,17 +27,18 @@ class Value;
class IVUsers; class IVUsers;
class ScalarEvolution; class ScalarEvolution;
class SCEV; class SCEV;
class IVUsers;
/// IVStrideUse - Keep track of one use of a strided induction variable. /// IVStrideUse - Keep track of one use of a strided induction variable.
/// The Expr member keeps track of the expression, User is the actual user /// The Expr member keeps track of the expression, User is the actual user
/// instruction of the operand, and 'OperandValToReplace' is the operand of /// instruction of the operand, and 'OperandValToReplace' is the operand of
/// the User that is the use. /// the User that is the use.
class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> { class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
friend class IVUsers;
public: public:
IVStrideUse(IVUsers *P, const SCEV *S, const SCEV *Off, IVStrideUse(IVUsers *P, const SCEV *E,
Instruction* U, Value *O) Instruction* U, Value *O)
: CallbackVH(U), Parent(P), Stride(S), Offset(Off), : CallbackVH(U), Parent(P), Expr(E), OperandValToReplace(O) {
OperandValToReplace(O), IsUseOfPostIncrementedValue(false) {
} }
/// getUser - Return the user instruction for this use. /// getUser - Return the user instruction for this use.
@@ -53,23 +55,15 @@ public:
/// this IVStrideUse. /// this IVStrideUse.
IVUsers *getParent() const { return Parent; } IVUsers *getParent() const { return Parent; }
/// getStride - Return the expression for the stride for the use. /// getExpr - Return the expression for the use.
const SCEV *getStride() const { return Stride; } const SCEV *getExpr() const { return Expr; }
/// setStride - Assign a new stride to this use. /// setExpr - Assign a new expression to this use.
void setStride(const SCEV *Val) { void setExpr(const SCEV *Val) {
Stride = Val; Expr = Val;
} }
/// getOffset - Return the offset to add to a theoretical induction const SCEV *getStride(const Loop *L) const;
/// variable that starts at zero and counts up by the stride to compute
/// the value for the use. This always has the same type as the stride.
const SCEV *getOffset() const { return Offset; }
/// setOffset - Assign a new offset to this use.
void setOffset(const SCEV *Val) {
Offset = Val;
}
/// getOperandValToReplace - Return the Value of the operand in the user /// getOperandValToReplace - Return the Value of the operand in the user
/// instruction that this IVStrideUse is representing. /// instruction that this IVStrideUse is representing.
@@ -83,37 +77,30 @@ public:
OperandValToReplace = Op; OperandValToReplace = Op;
} }
/// isUseOfPostIncrementedValue - True if this should use the /// getPostIncLoops - Return the set of loops for which the expression has
/// post-incremented version of this IV, not the preincremented version. /// been adjusted to use post-inc mode.
/// This can only be set in special cases, such as the terminating setcc const PostIncLoopSet &getPostIncLoops() const {
/// instruction for a loop or uses dominated by the loop. return PostIncLoops;
bool isUseOfPostIncrementedValue() const {
return IsUseOfPostIncrementedValue;
} }
/// setIsUseOfPostIncrmentedValue - set the flag that indicates whether /// transformToPostInc - Transform the expression to post-inc form for the
/// this is a post-increment use. /// given loop.
void setIsUseOfPostIncrementedValue(bool Val) { void transformToPostInc(const Loop *L);
IsUseOfPostIncrementedValue = Val;
}
private: private:
/// Parent - a pointer to the IVUsers that owns this IVStrideUse. /// Parent - a pointer to the IVUsers that owns this IVStrideUse.
IVUsers *Parent; IVUsers *Parent;
/// Stride - The stride for this use. /// Expr - The expression for this use.
const SCEV *Stride; const SCEV *Expr;
/// Offset - The offset to add to the base induction expression.
const SCEV *Offset;
/// OperandValToReplace - The Value of the operand in the user instruction /// OperandValToReplace - The Value of the operand in the user instruction
/// that this IVStrideUse is representing. /// that this IVStrideUse is representing.
WeakVH OperandValToReplace; WeakVH OperandValToReplace;
/// IsUseOfPostIncrementedValue - True if this should use the /// PostIncLoops - The set of loops for which Expr has been adjusted to
/// post-incremented version of this IV, not the preincremented version. /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept.
bool IsUseOfPostIncrementedValue; PostIncLoopSet PostIncLoops;
/// Deleted - Implementation of CallbackVH virtual function to /// Deleted - Implementation of CallbackVH virtual function to
/// receive notification when the User is deleted. /// receive notification when the User is deleted.
@@ -174,18 +161,13 @@ public:
/// return true. Otherwise, return false. /// return true. Otherwise, return false.
bool AddUsersIfInteresting(Instruction *I); bool AddUsersIfInteresting(Instruction *I);
IVStrideUse &AddUser(const SCEV *Stride, const SCEV *Offset, IVStrideUse &AddUser(const SCEV *Expr,
Instruction *User, Value *Operand); Instruction *User, Value *Operand);
/// getReplacementExpr - Return a SCEV expression which computes the /// getReplacementExpr - Return a SCEV expression which computes the
/// value of the OperandValToReplace of the given IVStrideUse. /// value of the OperandValToReplace of the given IVStrideUse.
const SCEV *getReplacementExpr(const IVStrideUse &U) const; const SCEV *getReplacementExpr(const IVStrideUse &U) const;
/// getCanonicalExpr - Return a SCEV expression which computes the
/// value of the SCEV of the given IVStrideUse, ignoring the
/// isUseOfPostIncrementedValue flag.
const SCEV *getCanonicalExpr(const IVStrideUse &U) const;
typedef ilist<IVStrideUse>::iterator iterator; typedef ilist<IVStrideUse>::iterator iterator;
typedef ilist<IVStrideUse>::const_iterator const_iterator; typedef ilist<IVStrideUse>::const_iterator const_iterator;
iterator begin() { return IVUses.begin(); } iterator begin() { return IVUses.begin(); }

View File

@@ -15,6 +15,7 @@
#define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
#include "llvm/Support/IRBuilder.h" #include "llvm/Support/IRBuilder.h"
#include "llvm/Support/TargetFolder.h" #include "llvm/Support/TargetFolder.h"
#include <set> #include <set>
@@ -32,12 +33,12 @@ namespace llvm {
InsertedExpressions; InsertedExpressions;
std::set<Value*> InsertedValues; std::set<Value*> InsertedValues;
/// PostIncLoop - When non-null, expanded addrecs referring to the given /// PostIncLoops - Addrecs referring to any of the given loops are expanded
/// loop expanded in post-inc mode. For example, expanding {1,+,1}<L> in /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode
/// post-inc mode returns the add instruction that adds one to the phi /// returns the add instruction that adds one to the phi for {0,+,1}<L>,
/// for {0,+,1}<L>, as opposed to a new phi starting at 1. This is only /// as opposed to a new phi starting at 1. This is only supported in
/// supported in non-canonical mode. /// non-canonical mode.
const Loop *PostIncLoop; PostIncLoopSet PostIncLoops;
/// IVIncInsertPos - When this is non-null, addrecs expanded in the /// IVIncInsertPos - When this is non-null, addrecs expanded in the
/// loop it indicates should be inserted with increments at /// loop it indicates should be inserted with increments at
@@ -62,7 +63,7 @@ namespace llvm {
public: public:
/// SCEVExpander - Construct a SCEVExpander in "canonical" mode. /// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
explicit SCEVExpander(ScalarEvolution &se) explicit SCEVExpander(ScalarEvolution &se)
: SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true), : SE(se), IVIncInsertLoop(0), CanonicalMode(true),
Builder(se.getContext(), TargetFolder(se.TD)) {} Builder(se.getContext(), TargetFolder(se.TD)) {}
/// clear - Erase the contents of the InsertedExpressions map so that users /// clear - Erase the contents of the InsertedExpressions map so that users
@@ -89,14 +90,18 @@ namespace llvm {
IVIncInsertPos = Pos; IVIncInsertPos = Pos;
} }
/// setPostInc - If L is non-null, enable post-inc expansion for addrecs /// setPostInc - Enable post-inc expansion for addrecs referring to the
/// referring to the given loop. If L is null, disable post-inc expansion /// given loops. Post-inc expansion is only supported in non-canonical
/// completely. Post-inc expansion is only supported in non-canonical
/// mode. /// mode.
void setPostInc(const Loop *L) { void setPostInc(const PostIncLoopSet &L) {
assert(!CanonicalMode && assert(!CanonicalMode &&
"Post-inc expansion is not supported in CanonicalMode"); "Post-inc expansion is not supported in CanonicalMode");
PostIncLoop = L; PostIncLoops = L;
}
/// clearPostInc - Disable all post-inc expansion.
void clearPostInc() {
PostIncLoops.clear();
} }
/// disableCanonicalMode - Disable the behavior of expanding expressions in /// disableCanonicalMode - Disable the behavior of expanding expressions in

View File

@@ -0,0 +1,78 @@
//===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines utilities for working with "normalized" ScalarEvolution
// expressions.
//
// The following example illustrates post-increment uses and how normalized
// expressions help.
//
// for (i=0; i!=n; ++i) {
// ...
// }
// use(i);
//
// While the expression for most uses of i inside the loop is {0,+,1}<%L>, the
// expression for the use of i outside the loop is {1,+,1}<%L>, since i is
// incremented at the end of the loop body. This is inconveient, since it
// suggests that we need two different induction variables, one that starts
// at 0 and one that starts at 1. We'd prefer to be able to think of these as
// the same induction variable, with uses inside the loop using the
// "pre-incremented" value, and uses after the loop using the
// "post-incremented" value.
//
// Expressions for post-incremented uses are represented as an expression
// paired with a set of loops for which the expression is in "post-increment"
// mode (there may be multiple loops).
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
#define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
#include "llvm/ADT/SmallPtrSet.h"
namespace llvm {
class Instruction;
class DominatorTree;
class Loop;
class ScalarEvolution;
class SCEV;
class Value;
/// TransformKind - Different types of transformations that
/// TransformForPostIncUse can do.
enum TransformKind {
/// Normalize - Normalize according to the given loops.
Normalize,
/// NormalizeAutodetect - Detect post-inc opportunities on new expressions,
/// update the given loop set, and normalize.
NormalizeAutodetect,
/// Denormalize - Perform the inverse transform on the expression with the
/// given loop set.
Denormalize
};
/// PostIncLoopSet - A set of loops.
typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet;
/// TransformForPostIncUse - Transform the given expression according to the
/// given transformation kind.
const SCEV *TransformForPostIncUse(TransformKind Kind,
const SCEV *S,
Instruction *User,
Value *OperandValToReplace,
PostIncLoopSet &Loops,
ScalarEvolution &SE,
DominatorTree &DT);
}
#endif

View File

@@ -62,120 +62,34 @@ static void CollectSubexprs(const SCEV *S,
Ops.push_back(S); Ops.push_back(S);
} }
/// getSCEVStartAndStride - Compute the start and stride of this expression, /// isInteresting - Test whether the given expression is "interesting" when
/// returning false if the expression is not a start/stride pair, or true if it /// used by the given expression, within the context of analyzing the
/// is. The stride must be a loop invariant expression, but the start may be /// given loop.
/// a mix of loop invariant and loop variant expressions. The start cannot, static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) {
/// however, contain an AddRec from a different loop, unless that loop is an // Anything loop-invariant is interesting.
/// outer loop of the current loop. if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L))
static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
const SCEV *&Start, const SCEV *&Stride,
ScalarEvolution *SE, DominatorTree *DT) {
const SCEV *TheAddRec = Start; // Initialize to zero.
// If the outer level is an AddExpr, the operands are all start values except
// for a nested AddRecExpr.
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
if (const SCEVAddRecExpr *AddRec =
dyn_cast<SCEVAddRecExpr>(AE->getOperand(i)))
TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
else
Start = SE->getAddExpr(Start, AE->getOperand(i));
} else if (isa<SCEVAddRecExpr>(SH)) {
TheAddRec = SH;
} else {
return false; // not analyzable.
}
// Break down TheAddRec into its component parts.
SmallVector<const SCEV *, 4> Subexprs;
CollectSubexprs(TheAddRec, Subexprs, *SE);
// Look for an addrec on the current loop among the parts.
const SCEV *AddRecStride = 0;
for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(),
E = Subexprs.end(); I != E; ++I) {
const SCEV *S = *I;
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
if (AR->getLoop() == L) {
*I = AR->getStart();
AddRecStride = AR->getStepRecurrence(*SE);
break;
}
}
if (!AddRecStride)
return false;
// Add up everything else into a start value (which may not be
// loop-invariant).
const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
// Use getSCEVAtScope to attempt to simplify other loops out of
// the picture.
AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
Start = SE->getAddExpr(Start, AddRecStart);
// If stride is an instruction, make sure it properly dominates the header.
// Otherwise we could end up with a use before def situation.
if (!isa<SCEVConstant>(AddRecStride)) {
BasicBlock *Header = L->getHeader();
if (!AddRecStride->properlyDominates(Header, DT))
return false;
DEBUG(dbgs() << "[";
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
dbgs() << "] Variable stride: " << *AddRecStride << "\n");
}
Stride = AddRecStride;
return true;
}
/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
/// and now we need to decide whether the user should use the preinc or post-inc
/// value. If this user should use the post-inc version of the IV, return true.
///
/// Choosing wrong here can break dominance properties (if we choose to use the
/// post-inc value when we cannot) or it can end up adding extra live-ranges to
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
const Loop *L, DominatorTree *DT) {
// If the user is in the loop, use the preinc value.
if (L->contains(User)) return false;
BasicBlock *LatchBlock = L->getLoopLatch();
if (!LatchBlock)
return false;
// Ok, the user is outside of the loop. If it is dominated by the latch
// block, use the post-inc value.
if (DT->dominates(LatchBlock, User->getParent()))
return true; return true;
// There is one case we have to be careful of: PHI nodes. These little guys // An addrec is interesting if it's affine or if it has an interesting start.
// can live in blocks that are not dominated by the latch block, but (since if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// their uses occur in the predecessor block, not the block the PHI lives in) // Keep things simple. Don't touch loop-variant strides.
// should still use the post-inc value. Check for this case now. if (AR->getLoop() == L && (AR->isAffine() || !L->contains(I)))
PHINode *PN = dyn_cast<PHINode>(User); return true;
if (!PN) return false; // not a phi, not dominated by latch block. // Otherwise recurse to see if the start value is interesting.
return isInteresting(AR->getStart(), I, L);
}
// Look at all of the uses of IV by the PHI node. If any use corresponds to // An add is interesting if any of its operands is.
// a block that is not dominated by the latch block, give up and use the if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// preincremented value. for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
unsigned NumUses = 0; OI != OE; ++OI)
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (isInteresting(*OI, I, L))
if (PN->getIncomingValue(i) == IV) { return true;
++NumUses; return false;
if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) }
return false;
}
// Okay, all uses of IV by PN are in predecessor blocks that really are // Nothing else is interesting here.
// dominated by the latch block. Use the post-incremented value. return false;
return true;
} }
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a /// AddUsersIfInteresting - Inspect the specified instruction. If it is a
@@ -196,16 +110,9 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
const SCEV *ISE = SE->getSCEV(I); const SCEV *ISE = SE->getSCEV(I);
if (isa<SCEVCouldNotCompute>(ISE)) return false; if (isa<SCEVCouldNotCompute>(ISE)) return false;
// Get the start and stride for this expression. // If we've come to an uninteresting expression, stop the traversal and
Loop *UseLoop = LI->getLoopFor(I->getParent()); // call this a user.
const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType()); if (!isInteresting(ISE, I, L))
const SCEV *Stride = Start;
if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
return false; // Non-reducible symbolic expression, bail out.
// Keep things simple. Don't touch loop-variant strides.
if (!Stride->isLoopInvariant(L) && L->contains(I))
return false; return false;
SmallPtrSet<Instruction *, 4> UniqueUsers; SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -241,27 +148,24 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
} }
if (AddUserToIVUsers) { if (AddUserToIVUsers) {
// Okay, we found a user that we cannot reduce. Analyze the instruction // Okay, we found a user that we cannot reduce.
// and decide what to do with it. If we are a use inside of the loop, use IVUses.push_back(new IVStrideUse(this, ISE, User, I));
// the value before incrementation, otherwise use it after incrementation. IVStrideUse &NewUse = IVUses.back();
if (IVUseShouldUsePostIncValue(User, I, L, DT)) { // Transform the expression into a normalized form.
// The value used will be incremented by the stride more than we are NewUse.Expr =
// expecting, so subtract this off. TransformForPostIncUse(NormalizeAutodetect, NewUse.Expr,
const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); User, I,
IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I)); NewUse.PostIncLoops,
IVUses.back().setIsUseOfPostIncrementedValue(true); *SE, *DT);
DEBUG(dbgs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); DEBUG(dbgs() << " NORMALIZED TO: " << *NewUse.Expr << '\n');
} else {
IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
}
} }
} }
return true; return true;
} }
IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, IVStrideUse &IVUsers::AddUser(const SCEV *Expr,
Instruction *User, Value *Operand) { Instruction *User, Value *Operand) {
IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand)); IVUses.push_back(new IVStrideUse(this, Expr, User, Operand));
return IVUses.back(); return IVUses.back();
} }
@@ -295,30 +199,10 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
/// getReplacementExpr - Return a SCEV expression which computes the /// getReplacementExpr - Return a SCEV expression which computes the
/// value of the OperandValToReplace of the given IVStrideUse. /// value of the OperandValToReplace of the given IVStrideUse.
const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
// Start with zero. PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(U.PostIncLoops);
const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); return TransformForPostIncUse(Denormalize, U.getExpr(),
// Create the basic add recurrence. U.getUser(), U.getOperandValToReplace(),
RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); Loops, *SE, *DT);
// Add the offset in a separate step, because it may be loop-variant.
RetVal = SE->getAddExpr(RetVal, U.getOffset());
// For uses of post-incremented values, add an extra stride to compute
// the actual replacement value.
if (U.isUseOfPostIncrementedValue())
RetVal = SE->getAddExpr(RetVal, U.getStride());
return RetVal;
}
/// getCanonicalExpr - Return a SCEV expression which computes the
/// value of the SCEV of the given IVStrideUse, ignoring the
/// isUseOfPostIncrementedValue flag.
const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
// Start with zero.
const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
// Create the basic add recurrence.
RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
// Add the offset in a separate step, because it may be loop-variant.
RetVal = SE->getAddExpr(RetVal, U.getOffset());
return RetVal;
} }
void IVUsers::print(raw_ostream &OS, const Module *M) const { void IVUsers::print(raw_ostream &OS, const Module *M) const {
@@ -339,8 +223,13 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
WriteAsOperand(OS, UI->getOperandValToReplace(), false); WriteAsOperand(OS, UI->getOperandValToReplace(), false);
OS << " = " OS << " = "
<< *getReplacementExpr(*UI); << *getReplacementExpr(*UI);
if (UI->isUseOfPostIncrementedValue()) for (PostIncLoopSet::const_iterator
OS << " (post-inc)"; I = UI->PostIncLoops.begin(),
E = UI->PostIncLoops.end(); I != E; ++I) {
OS << " (post-inc with loop ";
WriteAsOperand(OS, (*I)->getHeader(), false);
OS << ")";
}
OS << " in "; OS << " in ";
UI->getUser()->print(OS, &Annotator); UI->getUser()->print(OS, &Annotator);
OS << '\n'; OS << '\n';
@@ -356,6 +245,39 @@ void IVUsers::releaseMemory() {
IVUses.clear(); IVUses.clear();
} }
static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (AR->getLoop() == L)
return AR;
return findAddRecForLoop(AR->getStart(), L);
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
return AR;
return 0;
}
return 0;
}
const SCEV *IVStrideUse::getStride(const Loop *L) const {
if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(), L))
return AR->getStepRecurrence(*Parent->SE);
return 0;
}
void IVStrideUse::transformToPostInc(const Loop *L) {
PostIncLoopSet Loops;
Loops.insert(L);
Expr = TransformForPostIncUse(Normalize, Expr,
getUser(), getOperandValToReplace(),
Loops, *Parent->SE, *Parent->DT);
PostIncLoops.insert(L);
}
void IVStrideUse::deleted() { void IVStrideUse::deleted() {
// Remove this user from the list. // Remove this user from the list.
Parent->IVUses.erase(this); Parent->IVUses.erase(this);

View File

@@ -966,9 +966,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Determine a normalized form of this expression, which is the expression // Determine a normalized form of this expression, which is the expression
// before any post-inc adjustment is made. // before any post-inc adjustment is made.
const SCEVAddRecExpr *Normalized = S; const SCEVAddRecExpr *Normalized = S;
if (L == PostIncLoop) { if (PostIncLoops.count(L)) {
const SCEV *Step = S->getStepRecurrence(SE); PostIncLoopSet Loops;
Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step)); Loops.insert(L);
Normalized =
cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
Loops, SE, *SE.DT));
} }
// Strip off any non-loop-dominating component from the addrec start. // Strip off any non-loop-dominating component from the addrec start.
@@ -1002,7 +1005,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Accommodate post-inc mode, if necessary. // Accommodate post-inc mode, if necessary.
Value *Result; Value *Result;
if (L != PostIncLoop) if (!PostIncLoops.count(L))
Result = PN; Result = PN;
else { else {
// In PostInc mode, use the post-incremented value. // In PostInc mode, use the post-incremented value.
@@ -1274,7 +1277,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
// If the SCEV is computable at this level, insert it into the header // If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted // after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop. // there) so that it is guaranteed to dominate any user inside the loop.
if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop) if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L))
InsertPt = L->getHeader()->getFirstNonPHI(); InsertPt = L->getHeader()->getFirstNonPHI();
while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
@@ -1296,7 +1299,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
Value *V = visit(S); Value *V = visit(S);
// Remember the expanded value for this SCEV at this location. // Remember the expanded value for this SCEV at this location.
if (!PostIncLoop) if (PostIncLoops.empty())
InsertedExpressions[std::make_pair(S, InsertPt)] = V; InsertedExpressions[std::make_pair(S, InsertPt)] = V;
restoreInsertPoint(SaveInsertBB, SaveInsertPt); restoreInsertPoint(SaveInsertBB, SaveInsertPt);
@@ -1304,7 +1307,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
} }
void SCEVExpander::rememberInstruction(Value *I) { void SCEVExpander::rememberInstruction(Value *I) {
if (!PostIncLoop) if (PostIncLoops.empty())
InsertedValues.insert(I); InsertedValues.insert(I);
// If we just claimed an existing instruction and that instruction had // If we just claimed an existing instruction and that instruction had

View File

@@ -0,0 +1,150 @@
//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements utilities for working with "normalized" expressions.
// See the comments at the top of ScalarEvolutionNormalization.h for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
using namespace llvm;
/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
/// and now we need to decide whether the user should use the preinc or post-inc
/// value. If this user should use the post-inc version of the IV, return true.
///
/// Choosing wrong here can break dominance properties (if we choose to use the
/// post-inc value when we cannot) or it can end up adding extra live-ranges to
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
const Loop *L, DominatorTree *DT) {
// If the user is in the loop, use the preinc value.
if (L->contains(User)) return false;
BasicBlock *LatchBlock = L->getLoopLatch();
if (!LatchBlock)
return false;
// Ok, the user is outside of the loop. If it is dominated by the latch
// block, use the post-inc value.
if (DT->dominates(LatchBlock, User->getParent()))
return true;
// There is one case we have to be careful of: PHI nodes. These little guys
// can live in blocks that are not dominated by the latch block, but (since
// their uses occur in the predecessor block, not the block the PHI lives in)
// should still use the post-inc value. Check for this case now.
PHINode *PN = dyn_cast<PHINode>(User);
if (!PN) return false; // not a phi, not dominated by latch block.
// Look at all of the uses of IV by the PHI node. If any use corresponds to
// a block that is not dominated by the latch block, give up and use the
// preincremented value.
unsigned NumUses = 0;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == IV) {
++NumUses;
if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
return false;
}
// Okay, all uses of IV by PN are in predecessor blocks that really are
// dominated by the latch block. Use the post-incremented value.
return true;
}
const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
const SCEV *S,
Instruction *User,
Value *OperandValToReplace,
PostIncLoopSet &Loops,
ScalarEvolution &SE,
DominatorTree &DT) {
if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
return S;
if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
const SCEV *O = X->getOperand();
const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
Loops, SE, DT);
if (O != N)
switch (S->getSCEVType()) {
case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
case scTruncate: return SE.getTruncateExpr(N, S->getType());
default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
}
return S;
}
if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
SmallVector<const SCEV *, 8> Operands;
bool Changed = false;
for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
I != E; ++I) {
const SCEV *O = *I;
const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
Loops, SE, DT);
Changed |= N != O;
Operands.push_back(N);
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// An addrec. This is the interesting part.
const Loop *L = AR->getLoop();
const SCEV *Result = SE.getAddRecExpr(Operands, L);
switch (Kind) {
default: llvm_unreachable("Unexpected transform name!");
case NormalizeAutodetect:
if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace))
if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
Loops.insert(L);
}
break;
case Normalize:
if (Loops.count(L))
Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
break;
case Denormalize:
if (Loops.count(L)) {
const SCEV *TransformedStep =
TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
User, OperandValToReplace, Loops, SE, DT);
Result = SE.getAddExpr(Result, TransformedStep);
}
break;
}
return Result;
}
if (Changed)
switch (S->getSCEVType()) {
case scAddExpr: return SE.getAddExpr(Operands);
case scMulExpr: return SE.getMulExpr(Operands);
case scSMaxExpr: return SE.getSMaxExpr(Operands);
case scUMaxExpr: return SE.getUMaxExpr(Operands);
default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
}
return S;
}
if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
const SCEV *LO = X->getLHS();
const SCEV *RO = X->getRHS();
const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
Loops, SE, DT);
const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
Loops, SE, DT);
if (LO != LN || RO != RN)
return SE.getUDivExpr(LN, RN);
return S;
}
llvm_unreachable("Unexpected SCEV kind!");
return 0;
}

View File

@@ -454,6 +454,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed; return Changed;
} }
// FIXME: It is an extremely bad idea to indvar substitute anything more
// complex than affine induction variables. Doing so will put expensive
// polynomial evaluations inside of the loop, and the str reduction pass
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
static bool isSafe(const SCEV *S, const Loop *L) {
// Loop-invariant values are safe.
if (S->isLoopInvariant(L)) return true;
// Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
// to transform them into efficient code.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
return AR->isAffine();
// An add is safe it all its operands are safe.
if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
E = Commutative->op_end(); I != E; ++I)
if (!isSafe(*I, L)) return false;
return true;
}
// A cast is safe if its operand is.
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
return isSafe(C->getOperand(), L);
// A udiv is safe if its operands are.
if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
return isSafe(UD->getLHS(), L) &&
isSafe(UD->getRHS(), L);
// SCEVUnknown is always safe.
if (isa<SCEVUnknown>(S))
return true;
// Nothing else is safe.
return false;
}
void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
SmallVector<WeakVH, 16> DeadInsts; SmallVector<WeakVH, 16> DeadInsts;
@@ -465,7 +505,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// the need for the code evaluation methods to insert induction variables // the need for the code evaluation methods to insert induction variables
// of different sizes. // of different sizes.
for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
const SCEV *Stride = UI->getStride();
Value *Op = UI->getOperandValToReplace(); Value *Op = UI->getOperandValToReplace();
const Type *UseTy = Op->getType(); const Type *UseTy = Op->getType();
Instruction *User = UI->getUser(); Instruction *User = UI->getUser();
@@ -486,7 +525,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// currently can only reduce affine polynomials. For now just disable // currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless // indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value. // it can be expanded to a trivial value.
if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) if (!isSafe(AR, L))
continue; continue;
// Determine the insertion point for this user. By default, insert // Determine the insertion point for this user. By default, insert

View File

@@ -781,10 +781,10 @@ struct LSRFixup {
/// will be replaced. /// will be replaced.
Value *OperandValToReplace; Value *OperandValToReplace;
/// PostIncLoop - If this user is to use the post-incremented value of an /// PostIncLoops - If this user is to use the post-incremented value of an
/// induction variable, this variable is non-null and holds the loop /// induction variable, this variable is non-null and holds the loop
/// associated with the induction variable. /// associated with the induction variable.
const Loop *PostIncLoop; PostIncLoopSet PostIncLoops;
/// LUIdx - The index of the LSRUse describing the expression which /// LUIdx - The index of the LSRUse describing the expression which
/// this fixup needs, minus an offset (below). /// this fixup needs, minus an offset (below).
@@ -795,6 +795,8 @@ struct LSRFixup {
/// offsets, for example in an unrolled loop. /// offsets, for example in an unrolled loop.
int64_t Offset; int64_t Offset;
bool isUseFullyOutsideLoop(const Loop *L) const;
LSRFixup(); LSRFixup();
void print(raw_ostream &OS) const; void print(raw_ostream &OS) const;
@@ -804,9 +806,24 @@ struct LSRFixup {
} }
LSRFixup::LSRFixup() LSRFixup::LSRFixup()
: UserInst(0), OperandValToReplace(0), PostIncLoop(0), : UserInst(0), OperandValToReplace(0),
LUIdx(~size_t(0)), Offset(0) {} LUIdx(~size_t(0)), Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
// PHI nodes use their value in their incoming blocks.
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == OperandValToReplace &&
L->contains(PN->getIncomingBlock(i)))
return false;
return true;
}
return !L->contains(UserInst);
}
void LSRFixup::print(raw_ostream &OS) const { void LSRFixup::print(raw_ostream &OS) const {
OS << "UserInst="; OS << "UserInst=";
// Store is common and interesting enough to be worth special-casing. // Store is common and interesting enough to be worth special-casing.
@@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", OperandValToReplace="; OS << ", OperandValToReplace=";
WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
if (PostIncLoop) { for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
E = PostIncLoops.end(); I != E; ++I) {
OS << ", PostIncLoop="; OS << ", PostIncLoop=";
WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false); WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
} }
if (LUIdx != ~size_t(0)) if (LUIdx != ~size_t(0))
@@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() {
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
// Conservatively assume there may be reuse if the quotient of their // Conservatively assume there may be reuse if the quotient of their
// strides could be a legal scale. // strides could be a legal scale.
const SCEV *A = CondUse->getStride(); const SCEV *A = CondUse->getStride(L);
const SCEV *B = UI->getStride(); const SCEV *B = UI->getStride(L);
if (!A || !B) continue;
if (SE.getTypeSizeInBits(A->getType()) != if (SE.getTypeSizeInBits(A->getType()) !=
SE.getTypeSizeInBits(B->getType())) { SE.getTypeSizeInBits(B->getType())) {
if (SE.getTypeSizeInBits(A->getType()) > if (SE.getTypeSizeInBits(A->getType()) >
@@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() {
ExitingBlock->getInstList().insert(TermBr, Cond); ExitingBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists! // Clone the IVUse, as the old use still exists!
CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(), CondUse = &IU.AddUser(CondUse->getExpr(),
Cond, CondUse->getOperandValToReplace()); Cond, CondUse->getOperandValToReplace());
TermBr->replaceUsesOfWith(OldCond, Cond); TermBr->replaceUsesOfWith(OldCond, Cond);
} }
@@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() {
// If we get to here, we know that we can transform the setcc instruction to // If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the // use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly. // live ranges for the IV correctly.
CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), CondUse->transformToPostInc(L);
CondUse->getStride()));
CondUse->setIsUseOfPostIncrementedValue(true);
Changed = true; Changed = true;
PostIncs.insert(Cond); PostIncs.insert(Cond);
@@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides; SmallSetVector<const SCEV *, 4> Strides;
// Collect interesting types and strides. // Collect interesting types and strides.
SmallVector<const SCEV *, 4> Worklist;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
const SCEV *Stride = UI->getStride(); const SCEV *Expr = UI->getExpr();
// Collect interesting types. // Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Stride->getType())); Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
// Add the stride for this loop. // Add strides for mentioned loops.
Strides.insert(Stride); Worklist.push_back(Expr);
do {
// Add strides for other mentioned loops. const SCEV *S = Worklist.pop_back_val();
for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset()); if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart())) Strides.insert(AR->getStepRecurrence(SE));
Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
}
} while (!Worklist.empty());
} }
// Compute interesting factors from the set of interesting strides. // Compute interesting factors from the set of interesting strides.
@@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LSRFixup &LF = getNewFixup(); LSRFixup &LF = getNewFixup();
LF.UserInst = UI->getUser(); LF.UserInst = UI->getUser();
LF.OperandValToReplace = UI->getOperandValToReplace(); LF.OperandValToReplace = UI->getOperandValToReplace();
if (UI->isUseOfPostIncrementedValue()) LF.PostIncLoops = UI->getPostIncLoops();
LF.PostIncLoop = L;
LSRUse::KindType Kind = LSRUse::Basic; LSRUse::KindType Kind = LSRUse::Basic;
const Type *AccessTy = 0; const Type *AccessTy = 0;
@@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
AccessTy = getAccessType(LF.UserInst); AccessTy = getAccessType(LF.UserInst);
} }
const SCEV *S = IU.getCanonicalExpr(*UI); const SCEV *S = UI->getExpr();
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we work // (N - i == 0), and this allows (N - i) to be the expression that we work
@@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.LUIdx = P.first; LF.LUIdx = P.first;
LF.Offset = P.second; LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx]; LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst); LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
// If this is the first use of this LSRUse, give it a formula. // If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) { if (LU.Formulae.empty()) {
@@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.LUIdx = P.first; LF.LUIdx = P.first;
LF.Offset = P.second; LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx]; LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst); LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
InsertSupplementalFormula(U, LU, LF.LUIdx); InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1); CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break; break;
@@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SmallVectorImpl<WeakVH> &DeadInsts) const { SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx]; const LSRUse &LU = Uses[LF.LUIdx];
// Then, collect some instructions which we will remain dominated by when // Then, collect some instructions which must be dominated by the
// expanding the replacement. These must be dominated by any operands that // expanding replacement. These must be dominated by any operands that
// will be required in the expansion. // will be required in the expansion.
SmallVector<Instruction *, 4> Inputs; SmallVector<Instruction *, 4> Inputs;
if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
@@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (Instruction *I = if (Instruction *I =
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
Inputs.push_back(I); Inputs.push_back(I);
if (LF.PostIncLoop) { if (LF.PostIncLoops.count(L)) {
if (!L->contains(LF.UserInst)) if (LF.isUseFullyOutsideLoop(L))
Inputs.push_back(L->getLoopLatch()->getTerminator()); Inputs.push_back(L->getLoopLatch()->getTerminator());
else else
Inputs.push_back(IVIncInsertPos); Inputs.push_back(IVIncInsertPos);
@@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Inform the Rewriter if we have a post-increment use, so that it can // Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion. // perform an advantageous expansion.
Rewriter.setPostInc(LF.PostIncLoop); Rewriter.setPostInc(LF.PostIncLoops);
// This is the type that the user actually needs. // This is the type that the user actually needs.
const Type *OpTy = LF.OperandValToReplace->getType(); const Type *OpTy = LF.OperandValToReplace->getType();
@@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *Reg = *I; const SCEV *Reg = *I;
assert(!Reg->isZero() && "Zero allocated in a base register!"); assert(!Reg->isZero() && "Zero allocated in a base register!");
// If we're expanding for a post-inc user for the add-rec's loop, make the // If we're expanding for a post-inc user, make the post-inc adjustment.
// post-inc adjustment. PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
const SCEV *Start = Reg; Reg = TransformForPostIncUse(Denormalize, Reg,
while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) { LF.UserInst, LF.OperandValToReplace,
if (AR->getLoop() == LF.PostIncLoop) { Loops, SE, DT);
Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
// If the user is inside the loop, insert the code after the increment
// so that it is dominated by its operand. If the original insert point
// was already dominated by the increment, keep it, because there may
// be loop-variant operands that need to be respected also.
if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
IP = IVIncInsertPos;
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
}
break;
}
Start = AR->getStart();
}
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
} }
@@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.AM.Scale != 0) { if (F.AM.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg; const SCEV *ScaledS = F.ScaledReg;
// If we're expanding for a post-inc user for the add-rec's loop, make the // If we're expanding for a post-inc user, make the post-inc adjustment.
// post-inc adjustment. PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS)) ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
if (AR->getLoop() == LF.PostIncLoop) LF.UserInst, LF.OperandValToReplace,
ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE)); Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) { if (LU.Kind == LSRUse::ICmpZero) {
// An interesting way of "folding" with an icmp is to use a negated // An interesting way of "folding" with an icmp is to use a negated
@@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
// We're done expanding now, so reset the rewriter. // We're done expanding now, so reset the rewriter.
Rewriter.setPostInc(0); Rewriter.clearPostInc();
// An ICmpZero Formula represents an ICmp which we're handling as a // An ICmpZero Formula represents an ICmp which we're handling as a
// comparison against zero. Now that we've expanded an expression for that // comparison against zero. Now that we've expanded an expression for that

View File

@@ -0,0 +1,277 @@
; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
; rdar://7236213
; CodeGen shouldn't require any lea instructions inside the marked loop.
; It should properly set up post-increment uses and do coalescing for
; the induction variables.
; CHECK: # Start
; CHECK-NOT: lea
; CHECK: # Stop
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %Step, float* %O, i64 %OS, i64 %N) nounwind {
entry:
%times4 = alloca float, align 4 ; <float*> [#uses=3]
%timesN = alloca float, align 4 ; <float*> [#uses=2]
%0 = load float* %Step, align 4 ; <float> [#uses=8]
%1 = ptrtoint float* %I to i64 ; <i64> [#uses=1]
%2 = ptrtoint float* %O to i64 ; <i64> [#uses=1]
%tmp = xor i64 %2, %1 ; <i64> [#uses=1]
%tmp16 = and i64 %tmp, 15 ; <i64> [#uses=1]
%3 = icmp eq i64 %tmp16, 0 ; <i1> [#uses=1]
%4 = trunc i64 %IS to i32 ; <i32> [#uses=1]
%5 = xor i32 %4, 1 ; <i32> [#uses=1]
%6 = trunc i64 %OS to i32 ; <i32> [#uses=1]
%7 = xor i32 %6, 1 ; <i32> [#uses=1]
%8 = or i32 %7, %5 ; <i32> [#uses=1]
%9 = icmp eq i32 %8, 0 ; <i1> [#uses=1]
br i1 %9, label %bb, label %return
bb: ; preds = %entry
%10 = load float* %Start, align 4 ; <float> [#uses=1]
br label %bb2
bb1: ; preds = %bb3
%11 = load float* %I_addr.0, align 4 ; <float> [#uses=1]
%12 = fmul float %11, %x.0 ; <float> [#uses=1]
store float %12, float* %O_addr.0, align 4
%13 = fadd float %x.0, %0 ; <float> [#uses=1]
%indvar.next53 = add i64 %14, 1 ; <i64> [#uses=1]
br label %bb2
bb2: ; preds = %bb1, %bb
%14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ; <i64> [#uses=21]
%x.0 = phi float [ %13, %bb1 ], [ %10, %bb ] ; <float> [#uses=6]
%N_addr.0 = sub i64 %N, %14 ; <i64> [#uses=4]
%O_addr.0 = getelementptr float* %O, i64 %14 ; <float*> [#uses=4]
%I_addr.0 = getelementptr float* %I, i64 %14 ; <float*> [#uses=3]
%15 = icmp slt i64 %N_addr.0, 1 ; <i1> [#uses=1]
br i1 %15, label %bb4, label %bb3
bb3: ; preds = %bb2
%16 = ptrtoint float* %O_addr.0 to i64 ; <i64> [#uses=1]
%17 = and i64 %16, 15 ; <i64> [#uses=1]
%18 = icmp eq i64 %17, 0 ; <i1> [#uses=1]
br i1 %18, label %bb4, label %bb1
bb4: ; preds = %bb3, %bb2
%19 = fmul float %0, 4.000000e+00 ; <float> [#uses=1]
store float %19, float* %times4, align 4
%20 = fmul float %0, 1.600000e+01 ; <float> [#uses=1]
store float %20, float* %timesN, align 4
%21 = fmul float %0, 0.000000e+00 ; <float> [#uses=1]
%22 = fadd float %21, %x.0 ; <float> [#uses=1]
%23 = fadd float %x.0, %0 ; <float> [#uses=1]
%24 = fmul float %0, 2.000000e+00 ; <float> [#uses=1]
%25 = fadd float %24, %x.0 ; <float> [#uses=1]
%26 = fmul float %0, 3.000000e+00 ; <float> [#uses=1]
%27 = fadd float %26, %x.0 ; <float> [#uses=1]
%28 = insertelement <4 x float> undef, float %22, i32 0 ; <<4 x float>> [#uses=1]
%29 = insertelement <4 x float> %28, float %23, i32 1 ; <<4 x float>> [#uses=1]
%30 = insertelement <4 x float> %29, float %25, i32 2 ; <<4 x float>> [#uses=1]
%31 = insertelement <4 x float> %30, float %27, i32 3 ; <<4 x float>> [#uses=5]
%asmtmp.i = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=3]
%32 = fadd <4 x float> %31, %asmtmp.i ; <<4 x float>> [#uses=3]
%33 = fadd <4 x float> %32, %asmtmp.i ; <<4 x float>> [#uses=3]
%34 = fadd <4 x float> %33, %asmtmp.i ; <<4 x float>> [#uses=2]
%asmtmp.i18 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %timesN) nounwind ; <<4 x float>> [#uses=8]
%35 = icmp sgt i64 %N_addr.0, 15 ; <i1> [#uses=2]
br i1 %3, label %bb6.preheader, label %bb8
bb6.preheader: ; preds = %bb4
br i1 %35, label %bb.nph43, label %bb7
bb.nph43: ; preds = %bb6.preheader
%tmp108 = add i64 %14, 16 ; <i64> [#uses=1]
%tmp111 = add i64 %14, 4 ; <i64> [#uses=1]
%tmp115 = add i64 %14, 8 ; <i64> [#uses=1]
%tmp119 = add i64 %14, 12 ; <i64> [#uses=1]
%tmp134 = add i64 %N, -16 ; <i64> [#uses=1]
%tmp135 = sub i64 %tmp134, %14 ; <i64> [#uses=1]
call void asm sideeffect "# Start.", "~{dirflag},~{fpsr},~{flags}"() nounwind
br label %bb5
bb5: ; preds = %bb.nph43, %bb5
%indvar102 = phi i64 [ 0, %bb.nph43 ], [ %indvar.next103, %bb5 ] ; <i64> [#uses=3]
%vX3.041 = phi <4 x float> [ %34, %bb.nph43 ], [ %45, %bb5 ] ; <<4 x float>> [#uses=2]
%vX0.039 = phi <4 x float> [ %31, %bb.nph43 ], [ %41, %bb5 ] ; <<4 x float>> [#uses=2]
%vX2.037 = phi <4 x float> [ %33, %bb.nph43 ], [ %46, %bb5 ] ; <<4 x float>> [#uses=2]
%vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2]
%tmp104 = shl i64 %indvar102, 4 ; <i64> [#uses=5]
%tmp105 = add i64 %14, %tmp104 ; <i64> [#uses=2]
%scevgep106 = getelementptr float* %I, i64 %tmp105 ; <float*> [#uses=1]
%scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp109 = add i64 %tmp108, %tmp104 ; <i64> [#uses=2]
%tmp112 = add i64 %tmp111, %tmp104 ; <i64> [#uses=2]
%scevgep113 = getelementptr float* %I, i64 %tmp112 ; <float*> [#uses=1]
%scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp116 = add i64 %tmp115, %tmp104 ; <i64> [#uses=2]
%scevgep117 = getelementptr float* %I, i64 %tmp116 ; <float*> [#uses=1]
%scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp120 = add i64 %tmp119, %tmp104 ; <i64> [#uses=2]
%scevgep121 = getelementptr float* %I, i64 %tmp120 ; <float*> [#uses=1]
%scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1]
%scevgep123 = getelementptr float* %O, i64 %tmp105 ; <float*> [#uses=1]
%scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1]
%scevgep126 = getelementptr float* %O, i64 %tmp112 ; <float*> [#uses=1]
%scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1]
%scevgep128 = getelementptr float* %O, i64 %tmp116 ; <float*> [#uses=1]
%scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1]
%scevgep130 = getelementptr float* %O, i64 %tmp120 ; <float*> [#uses=1]
%scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp132 = mul i64 %indvar102, -16 ; <i64> [#uses=1]
%tmp136 = add i64 %tmp135, %tmp132 ; <i64> [#uses=2]
%36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
%37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
%38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
%39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
%40 = fmul <4 x float> %36, %vX0.039 ; <<4 x float>> [#uses=1]
%41 = fadd <4 x float> %vX0.039, %asmtmp.i18 ; <<4 x float>> [#uses=2]
%42 = fmul <4 x float> %37, %vX1.036 ; <<4 x float>> [#uses=1]
%43 = fmul <4 x float> %38, %vX2.037 ; <<4 x float>> [#uses=1]
%44 = fmul <4 x float> %39, %vX3.041 ; <<4 x float>> [#uses=1]
store <4 x float> %40, <4 x float>* %scevgep123124, align 16
store <4 x float> %42, <4 x float>* %scevgep126127, align 16
store <4 x float> %43, <4 x float>* %scevgep128129, align 16
store <4 x float> %44, <4 x float>* %scevgep130131, align 16
%45 = fadd <4 x float> %vX3.041, %asmtmp.i18 ; <<4 x float>> [#uses=1]
%46 = fadd <4 x float> %vX2.037, %asmtmp.i18 ; <<4 x float>> [#uses=1]
%47 = fadd <4 x float> %vX1.036, %asmtmp.i18 ; <<4 x float>> [#uses=1]
%48 = icmp sgt i64 %tmp136, 15 ; <i1> [#uses=1]
%indvar.next103 = add i64 %indvar102, 1 ; <i64> [#uses=1]
br i1 %48, label %bb5, label %bb6.bb7_crit_edge
bb6.bb7_crit_edge: ; preds = %bb5
call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind
%scevgep110 = getelementptr float* %I, i64 %tmp109 ; <float*> [#uses=1]
%scevgep125 = getelementptr float* %O, i64 %tmp109 ; <float*> [#uses=1]
br label %bb7
bb7: ; preds = %bb6.bb7_crit_edge, %bb6.preheader
%I_addr.1.lcssa = phi float* [ %scevgep110, %bb6.bb7_crit_edge ], [ %I_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
%O_addr.1.lcssa = phi float* [ %scevgep125, %bb6.bb7_crit_edge ], [ %O_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
%vX0.0.lcssa = phi <4 x float> [ %41, %bb6.bb7_crit_edge ], [ %31, %bb6.preheader ] ; <<4 x float>> [#uses=1]
%N_addr.1.lcssa = phi i64 [ %tmp136, %bb6.bb7_crit_edge ], [ %N_addr.0, %bb6.preheader ] ; <i64> [#uses=1]
%asmtmp.i17 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=0]
br label %bb11
bb8: ; preds = %bb4
br i1 %35, label %bb.nph, label %bb11
bb.nph: ; preds = %bb8
%I_addr.0.sum = add i64 %14, -1 ; <i64> [#uses=1]
%49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1]
%50 = bitcast float* %49 to <4 x float>* ; <<4 x float>*> [#uses=1]
%51 = load <4 x float>* %50, align 16 ; <<4 x float>> [#uses=1]
%tmp54 = add i64 %14, 16 ; <i64> [#uses=1]
%tmp56 = add i64 %14, 3 ; <i64> [#uses=1]
%tmp60 = add i64 %14, 7 ; <i64> [#uses=1]
%tmp64 = add i64 %14, 11 ; <i64> [#uses=1]
%tmp68 = add i64 %14, 15 ; <i64> [#uses=1]
%tmp76 = add i64 %14, 4 ; <i64> [#uses=1]
%tmp80 = add i64 %14, 8 ; <i64> [#uses=1]
%tmp84 = add i64 %14, 12 ; <i64> [#uses=1]
%tmp90 = add i64 %N, -16 ; <i64> [#uses=1]
%tmp91 = sub i64 %tmp90, %14 ; <i64> [#uses=1]
br label %bb9
bb9: ; preds = %bb.nph, %bb9
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb9 ] ; <i64> [#uses=3]
%vX3.125 = phi <4 x float> [ %34, %bb.nph ], [ %69, %bb9 ] ; <<4 x float>> [#uses=2]
%vX0.223 = phi <4 x float> [ %31, %bb.nph ], [ %65, %bb9 ] ; <<4 x float>> [#uses=2]
%vX2.121 = phi <4 x float> [ %33, %bb.nph ], [ %70, %bb9 ] ; <<4 x float>> [#uses=2]
%vX1.120 = phi <4 x float> [ %32, %bb.nph ], [ %71, %bb9 ] ; <<4 x float>> [#uses=2]
%vI0.019 = phi <4 x float> [ %51, %bb.nph ], [ %55, %bb9 ] ; <<4 x float>> [#uses=1]
%tmp51 = shl i64 %indvar, 4 ; <i64> [#uses=9]
%tmp55 = add i64 %tmp54, %tmp51 ; <i64> [#uses=2]
%tmp57 = add i64 %tmp56, %tmp51 ; <i64> [#uses=1]
%scevgep58 = getelementptr float* %I, i64 %tmp57 ; <float*> [#uses=1]
%scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp61 = add i64 %tmp60, %tmp51 ; <i64> [#uses=1]
%scevgep62 = getelementptr float* %I, i64 %tmp61 ; <float*> [#uses=1]
%scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp65 = add i64 %tmp64, %tmp51 ; <i64> [#uses=1]
%scevgep66 = getelementptr float* %I, i64 %tmp65 ; <float*> [#uses=1]
%scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp69 = add i64 %tmp68, %tmp51 ; <i64> [#uses=1]
%scevgep70 = getelementptr float* %I, i64 %tmp69 ; <float*> [#uses=1]
%scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp72 = add i64 %14, %tmp51 ; <i64> [#uses=1]
%scevgep73 = getelementptr float* %O, i64 %tmp72 ; <float*> [#uses=1]
%scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp77 = add i64 %tmp76, %tmp51 ; <i64> [#uses=1]
%scevgep78 = getelementptr float* %O, i64 %tmp77 ; <float*> [#uses=1]
%scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp81 = add i64 %tmp80, %tmp51 ; <i64> [#uses=1]
%scevgep82 = getelementptr float* %O, i64 %tmp81 ; <float*> [#uses=1]
%scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp85 = add i64 %tmp84, %tmp51 ; <i64> [#uses=1]
%scevgep86 = getelementptr float* %O, i64 %tmp85 ; <float*> [#uses=1]
%scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp88 = mul i64 %indvar, -16 ; <i64> [#uses=1]
%tmp92 = add i64 %tmp91, %tmp88 ; <i64> [#uses=2]
%52 = load <4 x float>* %scevgep5859, align 16 ; <<4 x float>> [#uses=2]
%53 = load <4 x float>* %scevgep6263, align 16 ; <<4 x float>> [#uses=2]
%54 = load <4 x float>* %scevgep6667, align 16 ; <<4 x float>> [#uses=2]
%55 = load <4 x float>* %scevgep7071, align 16 ; <<4 x float>> [#uses=2]
%56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%59 = shufflevector <4 x float> %58, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%60 = shufflevector <4 x float> %53, <4 x float> %54, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%61 = shufflevector <4 x float> %60, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%62 = shufflevector <4 x float> %54, <4 x float> %55, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%63 = shufflevector <4 x float> %62, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%64 = fmul <4 x float> %57, %vX0.223 ; <<4 x float>> [#uses=1]
%65 = fadd <4 x float> %vX0.223, %asmtmp.i18 ; <<4 x float>> [#uses=2]
%66 = fmul <4 x float> %59, %vX1.120 ; <<4 x float>> [#uses=1]
%67 = fmul <4 x float> %61, %vX2.121 ; <<4 x float>> [#uses=1]
%68 = fmul <4 x float> %63, %vX3.125 ; <<4 x float>> [#uses=1]
store <4 x float> %64, <4 x float>* %scevgep7374, align 16
store <4 x float> %66, <4 x float>* %scevgep7879, align 16
store <4 x float> %67, <4 x float>* %scevgep8283, align 16
store <4 x float> %68, <4 x float>* %scevgep8687, align 16
%69 = fadd <4 x float> %vX3.125, %asmtmp.i18 ; <<4 x float>> [#uses=1]
%70 = fadd <4 x float> %vX2.121, %asmtmp.i18 ; <<4 x float>> [#uses=1]
%71 = fadd <4 x float> %vX1.120, %asmtmp.i18 ; <<4 x float>> [#uses=1]
%72 = icmp sgt i64 %tmp92, 15 ; <i1> [#uses=1]
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge
bb10.bb11.loopexit_crit_edge: ; preds = %bb9
%scevgep = getelementptr float* %I, i64 %tmp55 ; <float*> [#uses=1]
%scevgep75 = getelementptr float* %O, i64 %tmp55 ; <float*> [#uses=1]
br label %bb11
bb11: ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7
%N_addr.2 = phi i64 [ %N_addr.1.lcssa, %bb7 ], [ %tmp92, %bb10.bb11.loopexit_crit_edge ], [ %N_addr.0, %bb8 ] ; <i64> [#uses=2]
%vX0.1 = phi <4 x float> [ %vX0.0.lcssa, %bb7 ], [ %65, %bb10.bb11.loopexit_crit_edge ], [ %31, %bb8 ] ; <<4 x float>> [#uses=1]
%O_addr.2 = phi float* [ %O_addr.1.lcssa, %bb7 ], [ %scevgep75, %bb10.bb11.loopexit_crit_edge ], [ %O_addr.0, %bb8 ] ; <float*> [#uses=1]
%I_addr.2 = phi float* [ %I_addr.1.lcssa, %bb7 ], [ %scevgep, %bb10.bb11.loopexit_crit_edge ], [ %I_addr.0, %bb8 ] ; <float*> [#uses=1]
%73 = extractelement <4 x float> %vX0.1, i32 0 ; <float> [#uses=2]
%74 = icmp sgt i64 %N_addr.2, 0 ; <i1> [#uses=1]
br i1 %74, label %bb12, label %bb14
bb12: ; preds = %bb11, %bb12
%indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ; <i64> [#uses=3]
%x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2]
%I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1]
%O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1]
%75 = load float* %I_addr.433, align 4 ; <float> [#uses=1]
%76 = fmul float %75, %x.130 ; <float> [#uses=1]
store float %76, float* %O_addr.432, align 4
%77 = fadd float %x.130, %0 ; <float> [#uses=2]
%indvar.next95 = add i64 %indvar94, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %indvar.next95, %N_addr.2 ; <i1> [#uses=1]
br i1 %exitcond, label %bb14, label %bb12
bb14: ; preds = %bb12, %bb11
%x.1.lcssa = phi float [ %73, %bb11 ], [ %77, %bb12 ] ; <float> [#uses=1]
store float %x.1.lcssa, float* %Start, align 4
ret void
return: ; preds = %entry
ret void
}

View File

@@ -1,4 +1,4 @@
; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc)} ; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)}
; The value of %r is dependent on a polynomial iteration expression. ; The value of %r is dependent on a polynomial iteration expression.