Now that mem2reg understands how to cope with a slightly wider set of

uses of an alloca, we can pre-compute promotability while analyzing an
alloca for splitting in SROA. That lets us short-circuit the common case
of a bunch of trivially promotable allocas. This cuts 20% to 30% off the
run time of SROA for typical frontend-generated IR sequneces I'm seeing.
It gets the new SROA to within 20% of ScalarRepl for such code. My
current benchmark for these numbers is PR15412, but it fits the general
pattern of IR emitted by Clang so it should be widely applicable.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187323 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2013-07-28 08:27:12 +00:00
parent 6c3a95dab5
commit cea60aff34

View File

@ -197,6 +197,18 @@ public:
/// \brief Construct the slices of a particular alloca. /// \brief Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI); AllocaSlices(const DataLayout &DL, AllocaInst &AI);
/// \brief Whether we determined during the trivial analysis of the alloca
/// that it was immediately promotable with mem2reg.
bool isAllocaPromotable() const { return IsAllocaPromotable; }
/// \brief A list of directly stored values when \c isAllocaPromotable is
/// true.
///
/// The contents are undefined if the alloca is not trivially promotable.
/// This is used to detect other allocas which should be iterated on when
/// doing direct promotion.
ArrayRef<Value *> getStoredValues() const { return StoredValues; }
/// \brief Test whether a pointer to the allocation escapes our analysis. /// \brief Test whether a pointer to the allocation escapes our analysis.
/// ///
/// If this is true, the slices are never fully built and should be /// If this is true, the slices are never fully built and should be
@ -253,10 +265,20 @@ private:
class SliceBuilder; class SliceBuilder;
friend class AllocaSlices::SliceBuilder; friend class AllocaSlices::SliceBuilder;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// \brief Handle to alloca instruction to simplify method interfaces. /// \brief Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI; AllocaInst &AI;
#endif
/// \brief A flag indicating if the alloca is trivially promotable.
///
/// While walking the alloca's uses we track when the uses exceed what
/// mem2reg can trivially handle. This essentially should match the logic in
/// \c isAllocaPromotable but re-using the existing walk of the pointer uses.
bool IsAllocaPromotable;
/// \brief Storage for stored values.
///
/// Only used while the alloca is trivially promotable.
SmallVector<Value *, 8> StoredValues;
/// \brief The instruction responsible for this alloca not having a known set /// \brief The instruction responsible for this alloca not having a known set
/// of slices. /// of slices.
@ -325,9 +347,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
SmallPtrSet<Instruction *, 4> VisitedDeadInsts; SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public: public:
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S) SliceBuilder(const DataLayout &DL, AllocaSlices &S)
: PtrUseVisitor<SliceBuilder>(DL), : PtrUseVisitor<SliceBuilder>(DL),
AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {} AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {}
private: private:
void markAsDead(Instruction &I) { void markAsDead(Instruction &I) {
@ -380,6 +402,15 @@ private:
if (GEPI.use_empty()) if (GEPI.use_empty())
return markAsDead(GEPI); return markAsDead(GEPI);
// FIXME: mem2reg shouldn't care about the nature of the GEP, but instead
// the offsets of the loads. Until then, we short-circuit here for the
// promotable case.
if (GEPI.hasAllZeroIndices())
return Base::enqueueUsers(GEPI);
// Otherwise, there is something in the GEP, so we disable mem2reg and
// accumulate it.
S.IsAllocaPromotable = false;
return Base::visitGetElementPtrInst(GEPI); return Base::visitGetElementPtrInst(GEPI);
} }
@ -396,6 +427,13 @@ private:
bool IsSplittable = bool IsSplittable =
Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
// mem2reg can only promote non-volatile loads and stores which exactly
// load the alloca (no offset and the right type).
if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType())
S.IsAllocaPromotable = false;
if (S.IsAllocaPromotable)
assert(Offset == 0);
insertUse(I, Offset, Size, IsSplittable); insertUse(I, Offset, Size, IsSplittable);
} }
@ -436,6 +474,9 @@ private:
return markAsDead(SI); return markAsDead(SI);
} }
if (S.IsAllocaPromotable)
S.StoredValues.push_back(ValOp);
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split"); "All simple FCA stores should have been pre-split");
handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
@ -453,6 +494,8 @@ private:
if (!IsOffsetKnown) if (!IsOffsetKnown)
return PI.setAborted(&II); return PI.setAborted(&II);
S.IsAllocaPromotable = false;
insertUse(II, Offset, insertUse(II, Offset,
Length ? Length->getLimitedValue() Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(), : AllocSize - Offset.getLimitedValue(),
@ -469,6 +512,8 @@ private:
if (!IsOffsetKnown) if (!IsOffsetKnown)
return PI.setAborted(&II); return PI.setAborted(&II);
S.IsAllocaPromotable = false;
uint64_t RawOffset = Offset.getLimitedValue(); uint64_t RawOffset = Offset.getLimitedValue();
uint64_t Size = Length ? Length->getLimitedValue() uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - RawOffset; : AllocSize - RawOffset;
@ -529,6 +574,8 @@ private:
return; return;
} }
S.IsAllocaPromotable = false;
Base::visitIntrinsicInst(II); Base::visitIntrinsicInst(II);
} }
@ -603,6 +650,8 @@ private:
return; return;
} }
S.IsAllocaPromotable = false;
insertUse(PN, Offset, PHISize); insertUse(PN, Offset, PHISize);
} }
@ -610,14 +659,18 @@ private:
if (SI.use_empty()) if (SI.use_empty())
return markAsDead(SI); return markAsDead(SI);
if (Value *Result = foldSelectInst(SI)) { if (Value *Result = foldSelectInst(SI)) {
if (Result == *U) if (Result == *U) {
// If the result of the constant fold will be the pointer, recurse // If the result of the constant fold will be the pointer, recurse
// through the select as if we had RAUW'ed it. // through the select as if we had RAUW'ed it.
enqueueUsers(SI); enqueueUsers(SI);
else
// FIXME: mem2reg should support this pattern, but it doesn't.
S.IsAllocaPromotable = false;
} else {
// Otherwise the operand to the select is dead, and we can replace it // Otherwise the operand to the select is dead, and we can replace it
// with undef. // with undef.
S.DeadOperands.push_back(U); S.DeadOperands.push_back(U);
}
return; return;
} }
@ -644,6 +697,8 @@ private:
return; return;
} }
S.IsAllocaPromotable = false;
insertUse(SI, Offset, SelectSize); insertUse(SI, Offset, SelectSize);
} }
@ -654,12 +709,8 @@ private:
}; };
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
: : AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) SliceBuilder PB(DL, *this);
AI(AI),
#endif
PointerEscapingInstr(0) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) { if (PtrI.isEscaped() || PtrI.isAborted()) {
// FIXME: We should sink the escape vs. abort info into the caller nicely, // FIXME: We should sink the escape vs. abort info into the caller nicely,
@ -3315,6 +3366,24 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
if (S.begin() == S.end()) if (S.begin() == S.end())
return Changed; return Changed;
// Trivially promotable, don't go through the splitting and rewriting.
if (S.isAllocaPromotable()) {
DEBUG(dbgs() << " Directly promoting alloca: " << AI << "\n");
PromotableAllocas.push_back(&AI);
// Walk through the stored values quickly here to handle directly
// promotable allocas that require iterating on other allocas.
ArrayRef<Value *> StoredValues = S.getStoredValues();
for (ArrayRef<Value *>::iterator SVI = StoredValues.begin(),
SVE = StoredValues.end();
SVI != SVE; ++SVI)
if ((*SVI)->getType()->isPointerTy())
if (AllocaInst *SAI =
dyn_cast<AllocaInst>((*SVI)->stripInBoundsOffsets()))
PostPromotionWorklist.insert(SAI);
return true;
}
Changed |= splitAlloca(AI, S); Changed |= splitAlloca(AI, S);
DEBUG(dbgs() << " Speculating PHIs\n"); DEBUG(dbgs() << " Speculating PHIs\n");