mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-09-24 23:28:41 +00:00
Generalize IVUsers to track arbitrary expressions rather than expressions
explicitly split into stride-and-offset pairs. Also, add the ability to track multiple post-increment loops on the same expression. This refines the concept of "normalizing" SCEV expressions used for to post-increment uses, and introduces a dedicated utility routine for normalizing and denormalizing expressions. This fixes the expansion of expressions which are post-increment users of more than one loop at a time. More broadly, this takes LSR another step closer to being able to reason about more than one loop at a time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100699 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -781,10 +781,10 @@ struct LSRFixup {
|
||||
/// will be replaced.
|
||||
Value *OperandValToReplace;
|
||||
|
||||
/// PostIncLoop - If this user is to use the post-incremented value of an
|
||||
/// PostIncLoops - If this user is to use the post-incremented value of an
|
||||
/// induction variable, this variable is non-null and holds the loop
|
||||
/// associated with the induction variable.
|
||||
const Loop *PostIncLoop;
|
||||
PostIncLoopSet PostIncLoops;
|
||||
|
||||
/// LUIdx - The index of the LSRUse describing the expression which
|
||||
/// this fixup needs, minus an offset (below).
|
||||
@@ -795,6 +795,8 @@ struct LSRFixup {
|
||||
/// offsets, for example in an unrolled loop.
|
||||
int64_t Offset;
|
||||
|
||||
bool isUseFullyOutsideLoop(const Loop *L) const;
|
||||
|
||||
LSRFixup();
|
||||
|
||||
void print(raw_ostream &OS) const;
|
||||
@@ -804,9 +806,24 @@ struct LSRFixup {
|
||||
}
|
||||
|
||||
LSRFixup::LSRFixup()
|
||||
: UserInst(0), OperandValToReplace(0), PostIncLoop(0),
|
||||
: UserInst(0), OperandValToReplace(0),
|
||||
LUIdx(~size_t(0)), Offset(0) {}
|
||||
|
||||
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
|
||||
/// value outside of the given loop.
|
||||
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
|
||||
// PHI nodes use their value in their incoming blocks.
|
||||
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
|
||||
if (PN->getIncomingValue(i) == OperandValToReplace &&
|
||||
L->contains(PN->getIncomingBlock(i)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return !L->contains(UserInst);
|
||||
}
|
||||
|
||||
void LSRFixup::print(raw_ostream &OS) const {
|
||||
OS << "UserInst=";
|
||||
// Store is common and interesting enough to be worth special-casing.
|
||||
@@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const {
|
||||
OS << ", OperandValToReplace=";
|
||||
WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
|
||||
|
||||
if (PostIncLoop) {
|
||||
for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
|
||||
E = PostIncLoops.end(); I != E; ++I) {
|
||||
OS << ", PostIncLoop=";
|
||||
WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
|
||||
WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
|
||||
}
|
||||
|
||||
if (LUIdx != ~size_t(0))
|
||||
@@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() {
|
||||
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
|
||||
// Conservatively assume there may be reuse if the quotient of their
|
||||
// strides could be a legal scale.
|
||||
const SCEV *A = CondUse->getStride();
|
||||
const SCEV *B = UI->getStride();
|
||||
const SCEV *A = CondUse->getStride(L);
|
||||
const SCEV *B = UI->getStride(L);
|
||||
if (!A || !B) continue;
|
||||
if (SE.getTypeSizeInBits(A->getType()) !=
|
||||
SE.getTypeSizeInBits(B->getType())) {
|
||||
if (SE.getTypeSizeInBits(A->getType()) >
|
||||
@@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() {
|
||||
ExitingBlock->getInstList().insert(TermBr, Cond);
|
||||
|
||||
// Clone the IVUse, as the old use still exists!
|
||||
CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
|
||||
CondUse = &IU.AddUser(CondUse->getExpr(),
|
||||
Cond, CondUse->getOperandValToReplace());
|
||||
TermBr->replaceUsesOfWith(OldCond, Cond);
|
||||
}
|
||||
@@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() {
|
||||
// If we get to here, we know that we can transform the setcc instruction to
|
||||
// use the post-incremented version of the IV, allowing us to coalesce the
|
||||
// live ranges for the IV correctly.
|
||||
CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
|
||||
CondUse->getStride()));
|
||||
CondUse->setIsUseOfPostIncrementedValue(true);
|
||||
CondUse->transformToPostInc(L);
|
||||
Changed = true;
|
||||
|
||||
PostIncs.insert(Cond);
|
||||
@@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
|
||||
SmallSetVector<const SCEV *, 4> Strides;
|
||||
|
||||
// Collect interesting types and strides.
|
||||
SmallVector<const SCEV *, 4> Worklist;
|
||||
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
|
||||
const SCEV *Stride = UI->getStride();
|
||||
const SCEV *Expr = UI->getExpr();
|
||||
|
||||
// Collect interesting types.
|
||||
Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
|
||||
Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
|
||||
|
||||
// Add the stride for this loop.
|
||||
Strides.insert(Stride);
|
||||
|
||||
// Add strides for other mentioned loops.
|
||||
for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
|
||||
AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
|
||||
Strides.insert(AR->getStepRecurrence(SE));
|
||||
// Add strides for mentioned loops.
|
||||
Worklist.push_back(Expr);
|
||||
do {
|
||||
const SCEV *S = Worklist.pop_back_val();
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
|
||||
Strides.insert(AR->getStepRecurrence(SE));
|
||||
Worklist.push_back(AR->getStart());
|
||||
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
|
||||
Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
|
||||
}
|
||||
} while (!Worklist.empty());
|
||||
}
|
||||
|
||||
// Compute interesting factors from the set of interesting strides.
|
||||
@@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
LSRFixup &LF = getNewFixup();
|
||||
LF.UserInst = UI->getUser();
|
||||
LF.OperandValToReplace = UI->getOperandValToReplace();
|
||||
if (UI->isUseOfPostIncrementedValue())
|
||||
LF.PostIncLoop = L;
|
||||
LF.PostIncLoops = UI->getPostIncLoops();
|
||||
|
||||
LSRUse::KindType Kind = LSRUse::Basic;
|
||||
const Type *AccessTy = 0;
|
||||
@@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
AccessTy = getAccessType(LF.UserInst);
|
||||
}
|
||||
|
||||
const SCEV *S = IU.getCanonicalExpr(*UI);
|
||||
const SCEV *S = UI->getExpr();
|
||||
|
||||
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
|
||||
// (N - i == 0), and this allows (N - i) to be the expression that we work
|
||||
@@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
LF.LUIdx = P.first;
|
||||
LF.Offset = P.second;
|
||||
LSRUse &LU = Uses[LF.LUIdx];
|
||||
LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
|
||||
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
|
||||
|
||||
// If this is the first use of this LSRUse, give it a formula.
|
||||
if (LU.Formulae.empty()) {
|
||||
@@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
|
||||
LF.LUIdx = P.first;
|
||||
LF.Offset = P.second;
|
||||
LSRUse &LU = Uses[LF.LUIdx];
|
||||
LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
|
||||
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
|
||||
InsertSupplementalFormula(U, LU, LF.LUIdx);
|
||||
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
|
||||
break;
|
||||
@@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
SmallVectorImpl<WeakVH> &DeadInsts) const {
|
||||
const LSRUse &LU = Uses[LF.LUIdx];
|
||||
|
||||
// Then, collect some instructions which we will remain dominated by when
|
||||
// expanding the replacement. These must be dominated by any operands that
|
||||
// Then, collect some instructions which must be dominated by the
|
||||
// expanding replacement. These must be dominated by any operands that
|
||||
// will be required in the expansion.
|
||||
SmallVector<Instruction *, 4> Inputs;
|
||||
if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
|
||||
@@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
if (Instruction *I =
|
||||
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
|
||||
Inputs.push_back(I);
|
||||
if (LF.PostIncLoop) {
|
||||
if (!L->contains(LF.UserInst))
|
||||
if (LF.PostIncLoops.count(L)) {
|
||||
if (LF.isUseFullyOutsideLoop(L))
|
||||
Inputs.push_back(L->getLoopLatch()->getTerminator());
|
||||
else
|
||||
Inputs.push_back(IVIncInsertPos);
|
||||
@@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
|
||||
// Inform the Rewriter if we have a post-increment use, so that it can
|
||||
// perform an advantageous expansion.
|
||||
Rewriter.setPostInc(LF.PostIncLoop);
|
||||
Rewriter.setPostInc(LF.PostIncLoops);
|
||||
|
||||
// This is the type that the user actually needs.
|
||||
const Type *OpTy = LF.OperandValToReplace->getType();
|
||||
@@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
const SCEV *Reg = *I;
|
||||
assert(!Reg->isZero() && "Zero allocated in a base register!");
|
||||
|
||||
// If we're expanding for a post-inc user for the add-rec's loop, make the
|
||||
// post-inc adjustment.
|
||||
const SCEV *Start = Reg;
|
||||
while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
|
||||
if (AR->getLoop() == LF.PostIncLoop) {
|
||||
Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
|
||||
// If the user is inside the loop, insert the code after the increment
|
||||
// so that it is dominated by its operand. If the original insert point
|
||||
// was already dominated by the increment, keep it, because there may
|
||||
// be loop-variant operands that need to be respected also.
|
||||
if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
|
||||
IP = IVIncInsertPos;
|
||||
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
|
||||
}
|
||||
break;
|
||||
}
|
||||
Start = AR->getStart();
|
||||
}
|
||||
// If we're expanding for a post-inc user, make the post-inc adjustment.
|
||||
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
|
||||
Reg = TransformForPostIncUse(Denormalize, Reg,
|
||||
LF.UserInst, LF.OperandValToReplace,
|
||||
Loops, SE, DT);
|
||||
|
||||
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
|
||||
}
|
||||
@@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
if (F.AM.Scale != 0) {
|
||||
const SCEV *ScaledS = F.ScaledReg;
|
||||
|
||||
// If we're expanding for a post-inc user for the add-rec's loop, make the
|
||||
// post-inc adjustment.
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
|
||||
if (AR->getLoop() == LF.PostIncLoop)
|
||||
ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
|
||||
// If we're expanding for a post-inc user, make the post-inc adjustment.
|
||||
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
|
||||
ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
|
||||
LF.UserInst, LF.OperandValToReplace,
|
||||
Loops, SE, DT);
|
||||
|
||||
if (LU.Kind == LSRUse::ICmpZero) {
|
||||
// An interesting way of "folding" with an icmp is to use a negated
|
||||
@@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
||||
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
|
||||
|
||||
// We're done expanding now, so reset the rewriter.
|
||||
Rewriter.setPostInc(0);
|
||||
Rewriter.clearPostInc();
|
||||
|
||||
// An ICmpZero Formula represents an ICmp which we're handling as a
|
||||
// comparison against zero. Now that we've expanded an expression for that
|
||||
|
Reference in New Issue
Block a user