SCEVExpander fixes. Affects LSR and indvars.

LSR has gradually been improved to more aggressively reuse existing code, particularly existing phi cycles. This exposed problems with the SCEVExpander's sloppy treatment of its insertion point. I applied some rigor to the insertion point problem that will hopefully avoid an endless bug cycle in this area. Changes:

- Always used properlyDominates to check safe code hoisting.

- The insertion point provided to SCEV is now considered a lower bound. This is usually a block terminator or the use itself. Under no cirumstance may SCEVExpander insert below this point.

- LSR is reponsible for finding a "canonical" insertion point across expansion of different expressions.

- Robust logic to determine whether IV increments are in "expanded" form and/or can be safely hoisted above some insertion point.

Fixes PR11783: SCEVExpander assert.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148535 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrew Trick 2012-01-20 07:41:13 +00:00
parent 0e2037ba2b
commit b5c26ef9da
6 changed files with 203 additions and 146 deletions

View File

@ -753,6 +753,12 @@ public:
// special checks necessary if A and B are in the same basic block.
bool dominates(const Instruction *A, const Instruction *B) const;
/// properlyDominates - Use this instead of dominates() to determine whether a
/// user of A can be hoisted above B.
bool properlyDominates(const Instruction *A, const Instruction *B) const {
return A != B && dominates(A, B);
}
bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const {
return DT->properlyDominates(A, B);
}

View File

@ -114,9 +114,12 @@ namespace llvm {
/// starts at zero and steps by one on each iteration.
PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty);
/// hoistStep - Utility for hoisting an IV increment.
static bool hoistStep(Instruction *IncV, Instruction *InsertPos,
const DominatorTree *DT);
/// getIVIncOperand - Return the induction variable increment's IV operand.
Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos,
bool allowScale);
/// hoistIVInc - Utility for hoisting an IV increment.
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos);
/// replaceCongruentIVs - replace congruent phis with their most canonical
/// representative. Return the number of phis eliminated.
@ -169,6 +172,13 @@ namespace llvm {
Builder.ClearInsertionPoint();
}
/// isInsertedInstruction - Return true if the specified instruction was
/// inserted by the code rewriter. If so, the client should not modify the
/// instruction.
bool isInsertedInstruction(Instruction *I) const {
return InsertedValues.count(I) || InsertedPostIncValues.count(I);
}
void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); }
private:
@ -205,13 +215,6 @@ namespace llvm {
/// result will be expanded to have that type, with a cast if necessary.
Value *expandCodeFor(const SCEV *SH, Type *Ty = 0);
/// isInsertedInstruction - Return true if the specified instruction was
/// inserted by the code rewriter. If so, the client should not modify the
/// instruction.
bool isInsertedInstruction(Instruction *I) const {
return InsertedValues.count(I) || InsertedPostIncValues.count(I);
}
/// getRelevantLoop - Determine the most "relevant" loop for the given SCEV.
const Loop *getRelevantLoop(const SCEV *);

View File

@ -38,8 +38,11 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
if (U->getType() == Ty)
if (CastInst *CI = dyn_cast<CastInst>(U))
if (CI->getOpcode() == Op) {
// If the cast isn't where we want it, fix it.
if (BasicBlock::iterator(CI) != IP) {
// If the cast isn't where we want it, fix it. All new or reused
// instructions must strictly dominate the Builder's InsertPt to
// ensure that the expression's expansion dominates its uses.
if (BasicBlock::iterator(CI) != IP
|| IP == Builder.GetInsertPoint()) {
// Create a new cast, and leave the old cast in place in case
// it is being used as an insert point. Clear its operand
// so that it doesn't hold anything live.
@ -867,6 +870,94 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
return isNormalAddRecExprPHI(PN, IncV, L);
}
/// getIVIncOperand returns an induction variable increment's induction
/// variable operand.
///
/// If allowScale is set, any type of GEP is allowed as long as the nonIV
/// operands dominate InsertPos.
///
/// If allowScale is not set, ensure that a GEP increment conforms to one of the
/// simple patterns generated by getAddRecExprPHILiterally and
/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
Instruction *InsertPos,
bool allowScale) {
if (IncV == InsertPos)
return NULL;
switch (IncV->getOpcode()) {
default:
return NULL;
// Check for a simple Add/Sub or GEP of a loop invariant step.
case Instruction::Add:
case Instruction::Sub: {
Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
if (!OInst || SE.DT->properlyDominates(OInst, InsertPos))
return dyn_cast<Instruction>(IncV->getOperand(0));
return NULL;
}
case Instruction::BitCast:
return dyn_cast<Instruction>(IncV->getOperand(0));
case Instruction::GetElementPtr:
for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
I != E; ++I) {
if (isa<Constant>(*I))
continue;
if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
if (!SE.DT->properlyDominates(OInst, InsertPos))
return NULL;
}
if (allowScale) {
// allow any kind of GEP as long as it can be hoisted.
continue;
}
// This must be a pointer addition of constants (pretty), which is already
// handled, or some number of address-size elements (ugly). Ugly geps
// have 2 operands. i1* is used by the expander to represent an
// address-size element.
if (IncV->getNumOperands() != 2)
return NULL;
unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
&& IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
return NULL;
break;
}
return dyn_cast<Instruction>(IncV->getOperand(0));
}
}
/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
/// it available to other uses in this loop. Recursively hoist any operands,
/// until we reach a value that dominates InsertPos.
bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
if (SE.DT->properlyDominates(IncV, InsertPos))
return true;
// InsertPos must itself dominate IncV so that IncV's new position satisfies
// its existing users.
if (!SE.DT->dominates(InsertPos->getParent(), IncV->getParent()))
return false;
// Check that the chain of IV operands leading back to Phi can be hoisted.
SmallVector<Instruction*, 4> IVIncs;
for(;;) {
Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
if (!Oper)
return false;
// IncV is safe to hoist.
IVIncs.push_back(IncV);
IncV = Oper;
if (SE.DT->properlyDominates(IncV, InsertPos))
break;
}
for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(),
E = IVIncs.rend(); I != E; ++I) {
(*I)->moveBefore(InsertPos);
}
return true;
}
/// Determine if this cyclic phi is in a form that would have been generated by
/// LSR. We don't care if the phi was actually expanded in this pass, as long
/// as it is in a low-cost form, for example, no implied multiplication. This
@ -874,55 +965,14 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
/// expandAddtoGEP.
bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
const Loop *L) {
if (ChainedPhis.count(PN))
for(Instruction *IVOper = IncV;
(IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
/*allowScale=*/false));) {
if (IVOper == PN)
return true;
switch (IncV->getOpcode()) {
// Check for a simple Add/Sub or GEP of a loop invariant step.
case Instruction::Add:
case Instruction::Sub:
return IncV->getOperand(0) == PN
&& L->isLoopInvariant(IncV->getOperand(1));
case Instruction::BitCast:
IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0));
if (!IncV)
return false;
// fall-thru to GEP handling
case Instruction::GetElementPtr: {
// This must be a pointer addition of constants (pretty) or some number of
// address-size elements (ugly).
for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
I != E; ++I) {
if (isa<Constant>(*I))
continue;
// ugly geps have 2 operands.
// i1* is used by the expander to represent an address-size element.
if (IncV->getNumOperands() != 2)
return false;
unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
&& IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
return false;
// Ensure the operands dominate the insertion point. I don't know of a
// case when this would not be true, so this is somewhat untested.
if (L == IVIncInsertLoop) {
for (User::op_iterator OI = IncV->op_begin()+1,
OE = IncV->op_end(); OI != OE; ++OI)
if (Instruction *OInst = dyn_cast<Instruction>(OI))
if (!SE.DT->dominates(OInst, IVIncInsertPos))
return false;
}
break;
}
IncV = dyn_cast<Instruction>(IncV->getOperand(0));
if (IncV && IncV->getOpcode() == Instruction::BitCast)
IncV = dyn_cast<Instruction>(IncV->getOperand(0));
return IncV == PN;
}
default:
return false;
}
}
/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
@ -981,16 +1031,12 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (LSRMode) {
if (!isExpandedAddRecExprPHI(PN, IncV, L))
continue;
if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos))
continue;
}
else {
if (!isNormalAddRecExprPHI(PN, IncV, L))
continue;
}
// Ok, the add recurrence looks usable.
// Remember this PHI, even in post-inc mode.
InsertedValues.insert(PN);
// Remember the increment.
rememberInstruction(IncV);
if (L == IVIncInsertLoop)
do {
if (SE.DT->dominates(IncV, IVIncInsertPos))
@ -1001,6 +1047,12 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
IVIncInsertPos = IncV;
IncV = cast<Instruction>(IncV->getOperand(0));
} while (IncV != PN);
}
// Ok, the add recurrence looks usable.
// Remember this PHI, even in post-inc mode.
InsertedValues.insert(PN);
// Remember the increment.
rememberInstruction(IncV);
return PN;
}
}
@ -1404,10 +1456,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
}
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *I) {
BasicBlock::iterator IP = I;
while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
++IP;
Instruction *IP) {
Builder.SetInsertPoint(IP->getParent(), IP);
return expandCodeFor(SH, Ty);
}
@ -1445,8 +1494,11 @@ Value *SCEVExpander::expand(const SCEV *S) {
// there) so that it is guaranteed to dominate any user inside the loop.
if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
InsertPt = L->getHeader()->getFirstInsertionPt();
while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
while (InsertPt != Builder.GetInsertPoint()
&& (isInsertedInstruction(InsertPt)
|| isa<DbgInfoIntrinsic>(InsertPt))) {
InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
}
break;
}
@ -1481,23 +1533,9 @@ void SCEVExpander::rememberInstruction(Value *I) {
InsertedPostIncValues.insert(I);
else
InsertedValues.insert(I);
// If we just claimed an existing instruction and that instruction had
// been the insert point, adjust the insert point forward so that
// subsequently inserted code will be dominated.
if (Builder.GetInsertPoint() == I) {
BasicBlock::iterator It = cast<Instruction>(I);
do { ++It; } while (isInsertedInstruction(It) ||
isa<DbgInfoIntrinsic>(It));
Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
}
}
void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
// If we acquired more instructions since the old insert point was saved,
// advance past them.
while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
Builder.SetInsertPoint(BB, I);
}
@ -1525,49 +1563,6 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
return V;
}
/// hoistStep - Attempt to hoist an IV increment above a potential use.
///
/// To successfully hoist, two criteria must be met:
/// - IncV operands dominate InsertPos and
/// - InsertPos dominates IncV
///
/// Meeting the second condition means that we don't need to check all of IncV's
/// existing uses (it's moving up in the domtree).
///
/// This does not yet recursively hoist the operands, although that would
/// not be difficult.
///
/// This does not require a SCEVExpander instance and could be replaced by a
/// general code-insertion helper.
bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos,
const DominatorTree *DT) {
// Phi nodes are strangely positional but don't follow normal rules for
// instruction dominance. Handle them immediately.
if (isa<PHINode>(InsertPos))
return isa<PHINode>(IncV);
else if (isa<PHINode>(IncV))
return false;
if (DT->dominates(IncV, InsertPos))
return true;
if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
return false;
if (IncV->mayHaveSideEffects())
return false;
// Attempt to hoist IncV
for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
OI != OE; ++OI) {
Instruction *OInst = dyn_cast<Instruction>(OI);
if (OInst && (OInst == InsertPos || !DT->dominates(OInst, InsertPos)))
return false;
}
IncV->moveBefore(InsertPos);
return true;
}
/// Sort values by integer width for replaceCongruentIVs.
static bool width_descending(Value *lhs, Value *rhs) {
// Put pointers at the back and make sure pointer < pointer = false.
@ -1632,10 +1627,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
// If this phi has the same width but is more canonical, replace the
// original with it.
// original with it. As part of the "more canonical" determination,
// respect a prior decision to use an IV chain.
if (OrigPhiRef->getType() == Phi->getType()
&& !isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)
&& isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) {
&& !(ChainedPhis.count(Phi)
|| isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L))
&& (ChainedPhis.count(Phi)
|| isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
std::swap(OrigPhiRef, Phi);
std::swap(OrigInc, IsomorphicInc);
}
@ -1649,7 +1647,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
IsomorphicInc->getType());
if (OrigInc != IsomorphicInc
&& TruncExpr == SE.getSCEV(IsomorphicInc)
&& hoistStep(OrigInc, IsomorphicInc, DT)) {
&& ((isa<PHINode>(OrigInc) && isa<PHINode>(IsomorphicInc))
|| hoistIVInc(OrigInc, IsomorphicInc))) {
DEBUG_WITH_TYPE(DebugType, dbgs()
<< "INDVARS: Eliminated congruent iv.inc: "
<< *IsomorphicInc << '\n');

View File

@ -846,7 +846,7 @@ protected:
const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
Instruction *WidenIVUse(NarrowIVDefUse DU);
Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
@ -990,7 +990,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
if (isa<PHINode>(DU.NarrowUse) &&
@ -1058,7 +1058,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
// NarrowUse.
Instruction *WideUse = 0;
if (WideAddRec == WideIncExpr
&& SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
WideUse = CloneIVUser(DU);
@ -1163,7 +1163,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
Instruction *WideUse = WidenIVUse(DU);
Instruction *WideUse = WidenIVUse(DU, Rewriter);
// Follow all def-use edges from the previous narrow use.
if (WideUse)

View File

@ -1573,9 +1573,11 @@ class LSRInstance {
BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs) const;
BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
BasicBlock::iterator
AdjustInsertPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU) const;
const LSRUse &LU,
SCEVExpander &Rewriter) const;
Value *Expand(const LSRFixup &LF,
const Formula &F,
@ -4131,9 +4133,10 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
/// AdjustInsertPositionForExpand - Determine an input position which will be
/// dominated by the operands and which will dominate the result.
BasicBlock::iterator
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
const LSRFixup &LF,
const LSRUse &LU) const {
const LSRUse &LU,
SCEVExpander &Rewriter) const {
// Collect some instructions which must be dominated by the
// expanding replacement. These must be dominated by any operands that
// will be required in the expansion.
@ -4168,9 +4171,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
}
}
assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
&& !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction");
// Then, climb up the immediate dominator tree as far as we can go while
// still being dominated by the input positions.
IP = HoistInsertPosition(IP, Inputs);
BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
@ -4181,6 +4188,11 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
// Set IP below instructions recently inserted by SCEVExpander. This keeps the
// IP consistent across expansions and allows the previously inserted
// instructions to be reused by subsequent expansion.
while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
return IP;
}
@ -4195,7 +4207,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
IP = AdjustInsertPositionForExpand(IP, LF, LU);
IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.

View File

@ -48,3 +48,40 @@ cond.false35.i: ; preds = %for.end.i
exit: ; preds = %cond.true29.i, %cond.true.i
ret i32 0
}
%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 }
@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16
; CHECK: @test2
; CHECK: %entry
; CHECK-NOT: mov
; CHECK: jne
define void @test2(i32 %n) nounwind uwtable {
entry:
br i1 undef, label %while.end, label %for.cond468
for.cond468: ; preds = %if.then477, %entry
%indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ]
%k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ]
%k.0 = load i32* %k.0.in, align 4
%0 = trunc i64 %indvars.iv1163 to i32
%cmp469 = icmp slt i32 %0, %n
br i1 %cmp469, label %for.body471, label %for.inc498
for.body471: ; preds = %for.cond468
%first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1
%1 = load i32* %first, align 4
br i1 undef, label %if.then477, label %for.inc498
if.then477: ; preds = %for.body471
%last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2
%indvars.iv.next1164 = add i64 %indvars.iv1163, 1
br label %for.cond468
for.inc498: ; preds = %for.inc498, %for.body471, %for.cond468
br label %for.inc498
while.end: ; preds = %entry
ret void
}