Make SCEVExpander and LSR more aggressive about hoisting expressions out

of loops.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@97642 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2010-03-03 05:29:13 +00:00
parent ed78dbafd0
commit 087bd1e3a1
2 changed files with 247 additions and 50 deletions

View File

@ -144,9 +144,28 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
}
// Save the original insertion point so we can restore it when we're done.
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) break;
// Ok, move up a level.
Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
}
// If we haven't found this binop, insert it.
Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
rememberInstruction(BO);
// Restore the original insert point.
if (SaveInsertBB)
restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return BO;
}
@ -493,12 +512,56 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
}
// Save the original insertion point so we can restore it when we're done.
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) break;
// Ok, move up a level.
Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
}
// Emit a GEP.
Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
rememberInstruction(GEP);
// Restore the original insert point.
if (SaveInsertBB)
restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return GEP;
}
// Save the original insertion point so we can restore it when we're done.
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(V)) break;
bool AnyIndexNotLoopInvariant = false;
for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
E = GepIndices.end(); I != E; ++I)
if (!L->isLoopInvariant(*I)) {
AnyIndexNotLoopInvariant = true;
break;
}
if (AnyIndexNotLoopInvariant)
break;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) break;
// Ok, move up a level.
Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
}
// Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
// because ScalarEvolution may have changed the address arithmetic to
// compute a value which is beyond the end of the allocated object.
@ -511,6 +574,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
"scevgep");
Ops.push_back(SE.getUnknown(GEP));
rememberInstruction(GEP);
// Restore the original insert point.
if (SaveInsertBB)
restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return expand(SE.getAddExpr(Ops));
}
@ -528,70 +596,179 @@ static bool isNonConstantNegative(const SCEV *F) {
return SC->getValue()->getValue().isNegative();
}
/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
/// SCEV expansion. If they are nested, this is the most nested. If they are
/// neighboring, pick the later.
static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
DominatorTree &DT) {
if (!A) return B;
if (!B) return A;
if (A->contains(B)) return B;
if (B->contains(A)) return A;
if (DT.dominates(A->getHeader(), B->getHeader())) return B;
if (DT.dominates(B->getHeader(), A->getHeader())) return A;
return A; // Arbitrarily break the tie.
}
/// GetRelevantLoop - Get the most relevant loop associated with the given
/// expression, according to PickMostRelevantLoop.
static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
DominatorTree &DT) {
if (isa<SCEVConstant>(S))
return 0;
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
return LI.getLoopFor(I->getParent());
return 0;
}
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
const Loop *L = 0;
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
L = AR->getLoop();
for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
I != E; ++I)
L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT);
return L;
}
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
return GetRelevantLoop(C->getOperand(), LI, DT);
if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S))
return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT),
GetRelevantLoop(D->getRHS(), LI, DT),
DT);
llvm_unreachable("Unexpected SCEV type!");
}
/// LoopCompare - Compare loops by PickMostRelevantLoop.
class LoopCompare {
DominatorTree &DT;
public:
explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
bool operator()(std::pair<const Loop *, const SCEV *> LHS,
std::pair<const Loop *, const SCEV *> RHS) const {
// Compare loops with PickMostRelevantLoop.
if (LHS.first != RHS.first)
return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
// If one operand is a non-constant negative and the other is not,
// put the non-constant negative on the right so that a sub can
// be used instead of a negate and add.
if (isNonConstantNegative(LHS.second)) {
if (!isNonConstantNegative(RHS.second))
return false;
} else if (isNonConstantNegative(RHS.second))
return true;
// Otherwise they are equivalent according to this comparison.
return false;
}
};
Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
int NumOperands = S->getNumOperands();
const Type *Ty = SE.getEffectiveSCEVType(S->getType());
// Find the index of an operand to start with. Choose the operand with
// pointer type, if there is one, or the last operand otherwise.
int PIdx = 0;
for (; PIdx != NumOperands - 1; ++PIdx)
if (S->getOperand(PIdx)->getType()->isPointerTy()) break;
// Collect all the add operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal, and
// so that pointer operands are inserted first, which the code below relies on
// to form more involved GEPs.
SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
E(S->op_begin()); I != E; ++I)
OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
*I));
// Expand code for the operand that we chose.
Value *V = expand(S->getOperand(PIdx));
// Sort by loop. Use a stable sort so that constants follow non-constants and
// pointer operands precede non-pointer operands.
std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
// Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
// comments on expandAddToGEP for details.
if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
// Take the operand at PIdx out of the list.
const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
SmallVector<const SCEV *, 8> NewOps;
NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx);
NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end());
// Make a GEP.
return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V);
}
// Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer
// arithmetic.
V = InsertNoopCastOfTo(V, Ty);
// Emit a bunch of add instructions
for (int i = NumOperands-1; i >= 0; --i) {
if (i == PIdx) continue;
const SCEV *Op = S->getOperand(i);
if (isNonConstantNegative(Op)) {
// Emit instructions to add all the operands. Hoist as much as possible
// out of loops, and form meaningful getelementptrs where possible.
Value *Sum = 0;
for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
const Loop *CurLoop = I->first;
const SCEV *Op = I->second;
if (!Sum) {
// This is the first operand. Just expand it.
Sum = expand(Op);
++I;
} else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
// The running sum expression is a pointer. Try to form a getelementptr
// at this level with that as the base.
SmallVector<const SCEV *, 4> NewOps;
for (; I != E && I->first == CurLoop; ++I)
NewOps.push_back(I->second);
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
} else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
// The running sum is an integer, and there's a pointer at this level.
// Try to form a getelementptr.
SmallVector<const SCEV *, 4> NewOps;
NewOps.push_back(SE.getUnknown(Sum));
for (++I; I != E && I->first == CurLoop; ++I)
NewOps.push_back(I->second);
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
} else if (isNonConstantNegative(Op)) {
// Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
V = InsertBinop(Instruction::Sub, V, W);
Sum = InsertNoopCastOfTo(Sum, Ty);
Sum = InsertBinop(Instruction::Sub, Sum, W);
++I;
} else {
// A simple add.
Value *W = expandCodeFor(Op, Ty);
V = InsertBinop(Instruction::Add, V, W);
Sum = InsertNoopCastOfTo(Sum, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Sum)) std::swap(Sum, W);
Sum = InsertBinop(Instruction::Add, Sum, W);
++I;
}
}
return V;
return Sum;
}
Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
const Type *Ty = SE.getEffectiveSCEVType(S->getType());
int FirstOp = 0; // Set if we should emit a subtract.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(0)))
if (SC->getValue()->isAllOnesValue())
FirstOp = 1;
int i = S->getNumOperands()-2;
Value *V = expandCodeFor(S->getOperand(i+1), Ty);
// Collect all the mul operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal.
SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
E(S->op_begin()); I != E; ++I)
OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
*I));
// Emit a bunch of multiply instructions
for (; i >= FirstOp; --i) {
Value *W = expandCodeFor(S->getOperand(i), Ty);
V = InsertBinop(Instruction::Mul, V, W);
// Sort by loop. Use a stable sort so that constants follow non-constants.
std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
// Emit instructions to mul all the operands. Hoist as much as possible
// out of loops.
Value *Prod = 0;
for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
const SCEV *Op = I->second;
if (!Prod) {
// This is the first operand. Just expand it.
Prod = expand(Op);
++I;
} else if (Op->isAllOnesValue()) {
// Instead of doing a multiply by negative one, just do a negate.
Prod = InsertNoopCastOfTo(Prod, Ty);
Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
++I;
} else {
// A simple mul.
Value *W = expandCodeFor(Op, Ty);
Prod = InsertNoopCastOfTo(Prod, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Prod)) std::swap(Prod, W);
Prod = InsertBinop(Instruction::Mul, Prod, W);
++I;
}
}
// -1 * ... ---> 0 - ...
if (FirstOp == 1)
V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V);
return V;
return Prod;
}
Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {

View File

@ -2874,6 +2874,13 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.AM.Scale != 0) {
@ -2900,12 +2907,25 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SE.getIntegerSCEV(F.AM.Scale,
ScaledS->getType()));
Ops.push_back(ScaledS);
// Flush the operand list to suppress SCEVExpander hoisting.
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
}
// Expand the immediate portions.
if (F.AM.BaseGV)
Ops.push_back(SE.getSCEV(F.AM.BaseGV));
// Expand the GV portion.
if (F.AM.BaseGV) {
Ops.push_back(SE.getUnknown(F.AM.BaseGV));
// Flush the operand list to suppress SCEVExpander hoisting.
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
// Expand the immediate portion.
int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
@ -2920,7 +2940,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
} else {
// Just add the immediate values. These again are expected to be matched
// as part of the address.
Ops.push_back(SE.getIntegerSCEV(Offset, IntTy));
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
}
}