mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-10-25 10:27:04 +00:00
Generalize IVUsers to track arbitrary expressions rather than expressions
explicitly split into stride-and-offset pairs. Also, add the ability to track multiple post-increment loops on the same expression. This refines the concept of "normalizing" SCEV expressions used for to post-increment uses, and introduces a dedicated utility routine for normalizing and denormalizing expressions. This fixes the expansion of expressions which are post-increment users of more than one loop at a time. More broadly, this takes LSR another step closer to being able to reason about more than one loop at a time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100699 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -454,6 +454,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// FIXME: It is an extremely bad idea to indvar substitute anything more
|
||||
// complex than affine induction variables. Doing so will put expensive
|
||||
// polynomial evaluations inside of the loop, and the str reduction pass
|
||||
// currently can only reduce affine polynomials. For now just disable
|
||||
// indvar subst on anything more complex than an affine addrec, unless
|
||||
// it can be expanded to a trivial value.
|
||||
static bool isSafe(const SCEV *S, const Loop *L) {
|
||||
// Loop-invariant values are safe.
|
||||
if (S->isLoopInvariant(L)) return true;
|
||||
|
||||
// Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
|
||||
// to transform them into efficient code.
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
|
||||
return AR->isAffine();
|
||||
|
||||
// An add is safe it all its operands are safe.
|
||||
if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
|
||||
for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
|
||||
E = Commutative->op_end(); I != E; ++I)
|
||||
if (!isSafe(*I, L)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// A cast is safe if its operand is.
|
||||
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
|
||||
return isSafe(C->getOperand(), L);
|
||||
|
||||
// A udiv is safe if its operands are.
|
||||
if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
|
||||
return isSafe(UD->getLHS(), L) &&
|
||||
isSafe(UD->getRHS(), L);
|
||||
|
||||
// SCEVUnknown is always safe.
|
||||
if (isa<SCEVUnknown>(S))
|
||||
return true;
|
||||
|
||||
// Nothing else is safe.
|
||||
return false;
|
||||
}
|
||||
|
||||
void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
|
||||
SmallVector<WeakVH, 16> DeadInsts;
|
||||
|
||||
@@ -465,7 +505,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
|
||||
// the need for the code evaluation methods to insert induction variables
|
||||
// of different sizes.
|
||||
for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
|
||||
const SCEV *Stride = UI->getStride();
|
||||
Value *Op = UI->getOperandValToReplace();
|
||||
const Type *UseTy = Op->getType();
|
||||
Instruction *User = UI->getUser();
|
||||
@@ -486,7 +525,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
|
||||
// currently can only reduce affine polynomials. For now just disable
|
||||
// indvar subst on anything more complex than an affine addrec, unless
|
||||
// it can be expanded to a trivial value.
|
||||
if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
|
||||
if (!isSafe(AR, L))
|
||||
continue;
|
||||
|
||||
// Determine the insertion point for this user. By default, insert
|
||||
|
||||
@@ -781,10 +781,10 @@ struct LSRFixup {
|
||||
/// will be replaced.
|
||||
Value *OperandValToReplace;
|
||||
|
||||
/// PostIncLoop - If this user is to use the post-incremented value of an
|
||||
/// PostIncLoops - If this user is to use the post-incremented value of an
|
||||
/// induction variable, this variable is non-null and holds the loop
|
||||
/// associated with the induction variable.
|
||||
const Loop *PostIncLoop;
|
||||
PostIncLoopSet PostIncLoops;
|
||||
|
||||
/// LUIdx - The index of the LSRUse describing the expression which
|
||||
/// this fixup needs, minus an offset (below).
|
||||
@@ -795,6 +795,8 @@ struct LSRFixup {
|
||||
/// offsets, for example in an unrolled loop.
|
||||
int64_t Offset;
|
||||
|
||||
bool isUseFullyOutsideLoop(const Loop *L) const;
|
||||
|
||||
LSRFixup();
|
||||
|
||||
void print(raw_ostream &OS) const;
|
||||
@@ -804,9 +806,24 @@ struct LSRFixup {
|
||||
}
|
||||
|
||||
LSRFixup::LSRFixup()
|
||||
: UserInst(0), OperandValToReplace(0), PostIncLoop(0),
|
||||
: UserInst(0), OperandValToReplace(0),
|
||||
LUIdx(~size_t(0)), Offset(0) {}
|
||||
|
||||
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
|
||||
/// value outside of the given loop.
|
||||
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
|
||||
// PHI nodes use their value in their incoming blocks.
|
||||
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
|
||||
if (PN->getIncomingValue(i) == OperandValToReplace &&
|
||||
L->contains(PN->getIncomingBlock(i)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return !L->contains(UserInst);
|
||||
}
|
||||
|
||||
void LSRFixup::print(raw_ostream &OS) const {
|
||||
OS << "UserInst=";
|
||||
// Store is common and interesting enough to be worth special-casing.
|
||||
@@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const {
|
||||
OS << ", OperandValToReplace=";
|
||||
WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
|
||||
|
||||
if (PostIncLoop) {
|
||||
for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
|
||||
E = PostIncLoops.end(); I != E; ++I) {
|
||||
OS << ", PostIncLoop=";
|
||||
WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
|
||||
WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
|
||||
}
|
||||
|
||||
if (LUIdx != ~size_t(0))
|
||||
@@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() {
|
||||
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
|
||||
// Conservatively assume there may be reuse if the quotient of their
|
||||
// strides could be a legal scale.
|
||||
const SCEV *A = CondUse->getStride();
|
||||
const SCEV *B = UI->getStride();
|
||||
const SCEV *A = CondUse->getStride(L);
|
||||
const SCEV *B = UI->getStride(L);
|
||||
if (!A || !B) continue;
|
||||
if (SE.getTypeSizeInBits(A->getType()) !=
|
||||
SE.getTypeSizeInBits(B->getType())) {
|
||||
if (SE.getTypeSizeInBits(A->getType()) >
|
||||
@@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() {
|
||||
ExitingBlock->getInstList().insert(TermBr, Cond);
|
||||
|
||||
// Clone the IVUse, as the old use still exists!
|
||||
CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
|
||||
CondUse = &IU.AddUser(CondUse->getExpr(),
|
||||
Cond, CondUse->getOperandValToReplace());
|
||||
TermBr->replaceUsesOfWith(OldCond, Cond);
|
||||
}
|
||||
@@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() {
|
||||
// If we get to here, we know that we can transform the setcc instruction to
|
||||
// use the post-incremented version of the IV, allowing us to coalesce the
|
||||
// live ranges for the IV correctly.
|
||||
CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
|
||||
CondUse->getStride()));
|
||||
CondUse->setIsUseOfPostIncrementedValue(true);
|
||||
CondUse->transformToPostInc(L);
|
||||
Changed = true;
|
||||
|
||||
PostIncs.insert(Cond);
|
||||
@@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
|
||||
SmallSetVector<const SCEV *, 4> Strides;
|
||||
|
||||
// Collect interesting types and strides.
|
||||
SmallVector<const SCEV *, 4> Worklist;
|
||||
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
|
||||
const SCEV *Stride = UI->getStride();
|
||||
const SCEV *Expr = UI->getExpr();
|
||||
|
||||
// Collect interesting types.
|
||||
Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
|
||||
Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
|
||||
|
||||
// Add the stride for this loop.
|
||||
Strides.insert(Stride);
|
||||
|
||||
// Add strides for other mentioned loops.
|
||||
for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
|
||||
AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
|
||||
Strides.insert(AR->getStepRecurrence(SE));
|
||||
// Add strides for mentioned loops.
|
||||
Worklist.push_back(Expr);
|
||||
do {
|
||||
const SCEV *S = Worklist.pop_back_val();
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
|
||||
Strides.insert(AR->getStepRecurrence(SE));
|
||||
Worklist.push_back(AR->getStart());
|
||||
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
|
||||
Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
|
||||
}
|
||||
} while (!Worklist.empty());
|
||||
}
|
||||
|
||||
// Compute interesting factors from the set of interesting strides.
|
||||
@@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
LSRFixup &LF = getNewFixup();
|
||||
LF.UserInst = UI->getUser();
|
||||
LF.OperandValToReplace = UI->getOperandValToReplace();
|
||||
if (UI->isUseOfPostIncrementedValue())
|
||||
LF.PostIncLoop = L;
|
||||
LF.PostIncLoops = UI->getPostIncLoops();
|
||||
|
||||
LSRUse::KindType Kind = LSRUse::Basic;
|
||||
const Type *AccessTy = 0;
|
||||
@@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
AccessTy = getAccessType(LF.UserInst);
|
||||
}
|
||||
|
||||
const SCEV *S = IU.getCanonicalExpr(*UI);
|
||||
const SCEV *S = UI->getExpr();
|
||||
|
||||
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
|
||||
// (N - i == 0), and this allows (N - i) to be the expression that we work
|
||||
@@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
LF.LUIdx = P.first;
|
||||
LF.Offset = P.second;
|
||||
LSRUse &LU = Uses[LF.LUIdx];
|
||||
LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
|
||||
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
|
||||
|
||||
// If this is the first use of this LSRUse, give it a formula.
|
||||
if (LU.Formulae.empty()) {
|
||||
@@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
|
||||
LF.LUIdx = P.first;
|
||||
LF.Offset = P.second;
|
||||
LSRUse &LU = Uses[LF.LUIdx];
|
||||
LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
|
||||
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
|
||||
InsertSupplementalFormula(U, LU, LF.LUIdx);
|
||||
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
|
||||
break;
|
||||
@@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
SmallVectorImpl<WeakVH> &DeadInsts) const {
|
||||
const LSRUse &LU = Uses[LF.LUIdx];
|
||||
|
||||
// Then, collect some instructions which we will remain dominated by when
|
||||
// expanding the replacement. These must be dominated by any operands that
|
||||
// Then, collect some instructions which must be dominated by the
|
||||
// expanding replacement. These must be dominated by any operands that
|
||||
// will be required in the expansion.
|
||||
SmallVector<Instruction *, 4> Inputs;
|
||||
if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
|
||||
@@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
if (Instruction *I =
|
||||
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
|
||||
Inputs.push_back(I);
|
||||
if (LF.PostIncLoop) {
|
||||
if (!L->contains(LF.UserInst))
|
||||
if (LF.PostIncLoops.count(L)) {
|
||||
if (LF.isUseFullyOutsideLoop(L))
|
||||
Inputs.push_back(L->getLoopLatch()->getTerminator());
|
||||
else
|
||||
Inputs.push_back(IVIncInsertPos);
|
||||
@@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
|
||||
// Inform the Rewriter if we have a post-increment use, so that it can
|
||||
// perform an advantageous expansion.
|
||||
Rewriter.setPostInc(LF.PostIncLoop);
|
||||
Rewriter.setPostInc(LF.PostIncLoops);
|
||||
|
||||
// This is the type that the user actually needs.
|
||||
const Type *OpTy = LF.OperandValToReplace->getType();
|
||||
@@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
const SCEV *Reg = *I;
|
||||
assert(!Reg->isZero() && "Zero allocated in a base register!");
|
||||
|
||||
// If we're expanding for a post-inc user for the add-rec's loop, make the
|
||||
// post-inc adjustment.
|
||||
const SCEV *Start = Reg;
|
||||
while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
|
||||
if (AR->getLoop() == LF.PostIncLoop) {
|
||||
Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
|
||||
// If the user is inside the loop, insert the code after the increment
|
||||
// so that it is dominated by its operand. If the original insert point
|
||||
// was already dominated by the increment, keep it, because there may
|
||||
// be loop-variant operands that need to be respected also.
|
||||
if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
|
||||
IP = IVIncInsertPos;
|
||||
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
|
||||
}
|
||||
break;
|
||||
}
|
||||
Start = AR->getStart();
|
||||
}
|
||||
// If we're expanding for a post-inc user, make the post-inc adjustment.
|
||||
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
|
||||
Reg = TransformForPostIncUse(Denormalize, Reg,
|
||||
LF.UserInst, LF.OperandValToReplace,
|
||||
Loops, SE, DT);
|
||||
|
||||
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
|
||||
}
|
||||
@@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
if (F.AM.Scale != 0) {
|
||||
const SCEV *ScaledS = F.ScaledReg;
|
||||
|
||||
// If we're expanding for a post-inc user for the add-rec's loop, make the
|
||||
// post-inc adjustment.
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
|
||||
if (AR->getLoop() == LF.PostIncLoop)
|
||||
ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
|
||||
// If we're expanding for a post-inc user, make the post-inc adjustment.
|
||||
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
|
||||
ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
|
||||
LF.UserInst, LF.OperandValToReplace,
|
||||
Loops, SE, DT);
|
||||
|
||||
if (LU.Kind == LSRUse::ICmpZero) {
|
||||
// An interesting way of "folding" with an icmp is to use a negated
|
||||
@@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
|
||||
|
||||
// We're done expanding now, so reset the rewriter.
|
||||
Rewriter.setPostInc(0);
|
||||
Rewriter.clearPostInc();
|
||||
|
||||
// An ICmpZero Formula represents an ICmp which we're handling as a
|
||||
// comparison against zero. Now that we've expanded an expression for that
|
||||
|
||||
Reference in New Issue
Block a user