When determining a canonical insert position, don't climb deeper

into adjacent loops. Also, ensure that the insert position is
dominated by the loop latch of any loop in the post-inc set which
has a latch.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100906 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2010-04-09 22:07:05 +00:00
parent 347fa3fa26
commit e5f76877ae
2 changed files with 103 additions and 33 deletions

View File

@ -1157,6 +1157,7 @@ class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
const TargetLowering *const TLI;
Loop *const L;
bool Changed;
@ -1236,9 +1237,12 @@ public:
DenseSet<const SCEV *> &VisitedRegs) const;
void Solve(SmallVectorImpl<const Formula *> &Solution) const;
BasicBlock::iterator AdjustInputPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU) const;
BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs) const;
BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU) const;
Value *Expand(const LSRFixup &LF,
const Formula &F,
@ -2813,12 +2817,64 @@ static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
return Node->getBlock();
}
/// AdjustInputPositionForExpand - Determine an input position which will be
/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
/// the dominator tree far as we can go while still being dominated by the
/// input positions. This helps canonicalize the insert position, which
/// encourages sharing.
BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
const {
for (;;) {
const Loop *IPLoop = LI.getLoopFor(IP->getParent());
unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
BasicBlock *IDom;
for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) {
IDom = getImmediateDominator(Rung, DT);
if (!IDom) return IP;
// Don't climb into a loop though.
const Loop *IDomLoop = LI.getLoopFor(IDom);
unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
if (IDomDepth <= IPLoopDepth &&
(IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
break;
}
bool AllDominate = true;
Instruction *BetterPos = 0;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
Instruction *Inst = *I;
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
break;
}
// Attempt to find an insert position in the middle of the block,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || DT.dominates(BetterPos, Inst)))
BetterPos = next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
IP = BetterPos;
else
IP = Tentative;
}
return IP;
}
/// AdjustInsertPositionForExpand - Determine an input position which will be
/// dominated by the operands and which will dominate the result.
BasicBlock::iterator
LSRInstance::AdjustInputPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU) const {
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU) const {
// Collect some instructions which must be dominated by the
// expanding replacement. These must be dominated by any operands that
// will be required in the expansion.
@ -2842,6 +2898,7 @@ LSRInstance::AdjustInputPositionForExpand(BasicBlock::iterator IP,
const Loop *PIL = *I;
if (PIL == L) continue;
// Be dominated by the loop exit.
SmallVector<BasicBlock *, 4> ExitingBlocks;
PIL->getExitingBlocks(ExitingBlocks);
if (!ExitingBlocks.empty()) {
@ -2850,34 +2907,15 @@ LSRInstance::AdjustInputPositionForExpand(BasicBlock::iterator IP,
BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
Inputs.push_back(BB->getTerminator());
}
// Be dominated by the loop latch, if it's unique.
if (BasicBlock *Latch = PIL->getLoopLatch())
Inputs.push_back(prior(BasicBlock::iterator(Latch->getTerminator())));
}
// Then, climb up the immediate dominator tree as far as we can go while
// still being dominated by the input positions.
for (;;) {
bool AllDominate = true;
Instruction *BetterPos = 0;
BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
if (!IDom) break;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
Instruction *Inst = *I;
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
break;
}
if (IDom == Inst->getParent() &&
(!BetterPos || DT.dominates(BetterPos, Inst)))
BetterPos = next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
IP = BetterPos;
else
IP = Tentative;
}
IP = HoistInsertPosition(IP, Inputs);
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
@ -2897,7 +2935,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
IP = AdjustInputPositionForExpand(IP, LF, LU);
IP = AdjustInsertPositionForExpand(IP, LF, LU);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
@ -3175,6 +3213,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
: IU(P->getAnalysis<IVUsers>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()),
LI(P->getAnalysis<LoopInfo>()),
TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
@ -3331,9 +3370,10 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<LoopInfo>();
AU.addPreserved("domfrontier");
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();

View File

@ -35,3 +35,33 @@ bb14: ; preds = %bb14, %bb10
store i8 undef, i8* %t9
br label %bb14
}
define fastcc void @TransformLine() nounwind {
bb:
br label %loop0
loop0: ; preds = %loop0, %bb
%i0 = phi i32 [ %i0.next, %loop0 ], [ 0, %bb ] ; <i32> [#uses=2]
%i0.next = add i32 %i0, 1 ; <i32> [#uses=1]
br i1 false, label %loop0, label %bb0
bb0: ; preds = %loop0
br label %loop1
loop1: ; preds = %bb5, %bb0
%i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ] ; <i32> [#uses=4]
%t0 = add i32 %i0, %i1 ; <i32> [#uses=1]
br i1 false, label %bb2, label %bb6
bb2: ; preds = %loop1
br i1 true, label %bb6, label %bb5
bb5: ; preds = %bb2
%i1.next = add i32 %i1, 1 ; <i32> [#uses=1]
br i1 true, label %bb6, label %loop1
bb6: ; preds = %bb5, %bb2, %loop1
%p8 = phi i32 [ %t0, %bb5 ], [ undef, %loop1 ], [ undef, %bb2 ] ; <i32> [#uses=0]
%p9 = phi i32 [ undef, %bb5 ], [ %i1, %loop1 ], [ %i1, %bb2 ] ; <i32> [#uses=0]
unreachable
}