mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-03 13:31:05 +00:00
Now that mem2reg understands how to cope with a slightly wider set of
uses of an alloca, we can pre-compute promotability while analyzing an alloca for splitting in SROA. That lets us short-circuit the common case of a bunch of trivially promotable allocas. This cuts 20% to 30% off the run time of SROA for typical frontend-generated IR sequneces I'm seeing. It gets the new SROA to within 20% of ScalarRepl for such code. My current benchmark for these numbers is PR15412, but it fits the general pattern of IR emitted by Clang so it should be widely applicable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187323 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6c3a95dab5
commit
cea60aff34
@ -197,6 +197,18 @@ public:
|
|||||||
/// \brief Construct the slices of a particular alloca.
|
/// \brief Construct the slices of a particular alloca.
|
||||||
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
|
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
|
||||||
|
|
||||||
|
/// \brief Whether we determined during the trivial analysis of the alloca
|
||||||
|
/// that it was immediately promotable with mem2reg.
|
||||||
|
bool isAllocaPromotable() const { return IsAllocaPromotable; }
|
||||||
|
|
||||||
|
/// \brief A list of directly stored values when \c isAllocaPromotable is
|
||||||
|
/// true.
|
||||||
|
///
|
||||||
|
/// The contents are undefined if the alloca is not trivially promotable.
|
||||||
|
/// This is used to detect other allocas which should be iterated on when
|
||||||
|
/// doing direct promotion.
|
||||||
|
ArrayRef<Value *> getStoredValues() const { return StoredValues; }
|
||||||
|
|
||||||
/// \brief Test whether a pointer to the allocation escapes our analysis.
|
/// \brief Test whether a pointer to the allocation escapes our analysis.
|
||||||
///
|
///
|
||||||
/// If this is true, the slices are never fully built and should be
|
/// If this is true, the slices are never fully built and should be
|
||||||
@ -253,10 +265,20 @@ private:
|
|||||||
class SliceBuilder;
|
class SliceBuilder;
|
||||||
friend class AllocaSlices::SliceBuilder;
|
friend class AllocaSlices::SliceBuilder;
|
||||||
|
|
||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
|
||||||
/// \brief Handle to alloca instruction to simplify method interfaces.
|
/// \brief Handle to alloca instruction to simplify method interfaces.
|
||||||
AllocaInst &AI;
|
AllocaInst &AI;
|
||||||
#endif
|
|
||||||
|
/// \brief A flag indicating if the alloca is trivially promotable.
|
||||||
|
///
|
||||||
|
/// While walking the alloca's uses we track when the uses exceed what
|
||||||
|
/// mem2reg can trivially handle. This essentially should match the logic in
|
||||||
|
/// \c isAllocaPromotable but re-using the existing walk of the pointer uses.
|
||||||
|
bool IsAllocaPromotable;
|
||||||
|
|
||||||
|
/// \brief Storage for stored values.
|
||||||
|
///
|
||||||
|
/// Only used while the alloca is trivially promotable.
|
||||||
|
SmallVector<Value *, 8> StoredValues;
|
||||||
|
|
||||||
/// \brief The instruction responsible for this alloca not having a known set
|
/// \brief The instruction responsible for this alloca not having a known set
|
||||||
/// of slices.
|
/// of slices.
|
||||||
@ -325,9 +347,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
|
|||||||
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
|
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
|
SliceBuilder(const DataLayout &DL, AllocaSlices &S)
|
||||||
: PtrUseVisitor<SliceBuilder>(DL),
|
: PtrUseVisitor<SliceBuilder>(DL),
|
||||||
AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
|
AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void markAsDead(Instruction &I) {
|
void markAsDead(Instruction &I) {
|
||||||
@ -380,6 +402,15 @@ private:
|
|||||||
if (GEPI.use_empty())
|
if (GEPI.use_empty())
|
||||||
return markAsDead(GEPI);
|
return markAsDead(GEPI);
|
||||||
|
|
||||||
|
// FIXME: mem2reg shouldn't care about the nature of the GEP, but instead
|
||||||
|
// the offsets of the loads. Until then, we short-circuit here for the
|
||||||
|
// promotable case.
|
||||||
|
if (GEPI.hasAllZeroIndices())
|
||||||
|
return Base::enqueueUsers(GEPI);
|
||||||
|
|
||||||
|
// Otherwise, there is something in the GEP, so we disable mem2reg and
|
||||||
|
// accumulate it.
|
||||||
|
S.IsAllocaPromotable = false;
|
||||||
return Base::visitGetElementPtrInst(GEPI);
|
return Base::visitGetElementPtrInst(GEPI);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -396,6 +427,13 @@ private:
|
|||||||
bool IsSplittable =
|
bool IsSplittable =
|
||||||
Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
|
Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
|
||||||
|
|
||||||
|
// mem2reg can only promote non-volatile loads and stores which exactly
|
||||||
|
// load the alloca (no offset and the right type).
|
||||||
|
if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType())
|
||||||
|
S.IsAllocaPromotable = false;
|
||||||
|
if (S.IsAllocaPromotable)
|
||||||
|
assert(Offset == 0);
|
||||||
|
|
||||||
insertUse(I, Offset, Size, IsSplittable);
|
insertUse(I, Offset, Size, IsSplittable);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -436,6 +474,9 @@ private:
|
|||||||
return markAsDead(SI);
|
return markAsDead(SI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (S.IsAllocaPromotable)
|
||||||
|
S.StoredValues.push_back(ValOp);
|
||||||
|
|
||||||
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
|
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
|
||||||
"All simple FCA stores should have been pre-split");
|
"All simple FCA stores should have been pre-split");
|
||||||
handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
|
handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
|
||||||
@ -453,6 +494,8 @@ private:
|
|||||||
if (!IsOffsetKnown)
|
if (!IsOffsetKnown)
|
||||||
return PI.setAborted(&II);
|
return PI.setAborted(&II);
|
||||||
|
|
||||||
|
S.IsAllocaPromotable = false;
|
||||||
|
|
||||||
insertUse(II, Offset,
|
insertUse(II, Offset,
|
||||||
Length ? Length->getLimitedValue()
|
Length ? Length->getLimitedValue()
|
||||||
: AllocSize - Offset.getLimitedValue(),
|
: AllocSize - Offset.getLimitedValue(),
|
||||||
@ -469,6 +512,8 @@ private:
|
|||||||
if (!IsOffsetKnown)
|
if (!IsOffsetKnown)
|
||||||
return PI.setAborted(&II);
|
return PI.setAborted(&II);
|
||||||
|
|
||||||
|
S.IsAllocaPromotable = false;
|
||||||
|
|
||||||
uint64_t RawOffset = Offset.getLimitedValue();
|
uint64_t RawOffset = Offset.getLimitedValue();
|
||||||
uint64_t Size = Length ? Length->getLimitedValue()
|
uint64_t Size = Length ? Length->getLimitedValue()
|
||||||
: AllocSize - RawOffset;
|
: AllocSize - RawOffset;
|
||||||
@ -529,6 +574,8 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
S.IsAllocaPromotable = false;
|
||||||
|
|
||||||
Base::visitIntrinsicInst(II);
|
Base::visitIntrinsicInst(II);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -603,6 +650,8 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
S.IsAllocaPromotable = false;
|
||||||
|
|
||||||
insertUse(PN, Offset, PHISize);
|
insertUse(PN, Offset, PHISize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -610,14 +659,18 @@ private:
|
|||||||
if (SI.use_empty())
|
if (SI.use_empty())
|
||||||
return markAsDead(SI);
|
return markAsDead(SI);
|
||||||
if (Value *Result = foldSelectInst(SI)) {
|
if (Value *Result = foldSelectInst(SI)) {
|
||||||
if (Result == *U)
|
if (Result == *U) {
|
||||||
// If the result of the constant fold will be the pointer, recurse
|
// If the result of the constant fold will be the pointer, recurse
|
||||||
// through the select as if we had RAUW'ed it.
|
// through the select as if we had RAUW'ed it.
|
||||||
enqueueUsers(SI);
|
enqueueUsers(SI);
|
||||||
else
|
|
||||||
|
// FIXME: mem2reg should support this pattern, but it doesn't.
|
||||||
|
S.IsAllocaPromotable = false;
|
||||||
|
} else {
|
||||||
// Otherwise the operand to the select is dead, and we can replace it
|
// Otherwise the operand to the select is dead, and we can replace it
|
||||||
// with undef.
|
// with undef.
|
||||||
S.DeadOperands.push_back(U);
|
S.DeadOperands.push_back(U);
|
||||||
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -644,6 +697,8 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
S.IsAllocaPromotable = false;
|
||||||
|
|
||||||
insertUse(SI, Offset, SelectSize);
|
insertUse(SI, Offset, SelectSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -654,12 +709,8 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
|
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
|
||||||
:
|
: AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) {
|
||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
SliceBuilder PB(DL, *this);
|
||||||
AI(AI),
|
|
||||||
#endif
|
|
||||||
PointerEscapingInstr(0) {
|
|
||||||
SliceBuilder PB(DL, AI, *this);
|
|
||||||
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
|
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
|
||||||
if (PtrI.isEscaped() || PtrI.isAborted()) {
|
if (PtrI.isEscaped() || PtrI.isAborted()) {
|
||||||
// FIXME: We should sink the escape vs. abort info into the caller nicely,
|
// FIXME: We should sink the escape vs. abort info into the caller nicely,
|
||||||
@ -3315,6 +3366,24 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
|
|||||||
if (S.begin() == S.end())
|
if (S.begin() == S.end())
|
||||||
return Changed;
|
return Changed;
|
||||||
|
|
||||||
|
// Trivially promotable, don't go through the splitting and rewriting.
|
||||||
|
if (S.isAllocaPromotable()) {
|
||||||
|
DEBUG(dbgs() << " Directly promoting alloca: " << AI << "\n");
|
||||||
|
PromotableAllocas.push_back(&AI);
|
||||||
|
|
||||||
|
// Walk through the stored values quickly here to handle directly
|
||||||
|
// promotable allocas that require iterating on other allocas.
|
||||||
|
ArrayRef<Value *> StoredValues = S.getStoredValues();
|
||||||
|
for (ArrayRef<Value *>::iterator SVI = StoredValues.begin(),
|
||||||
|
SVE = StoredValues.end();
|
||||||
|
SVI != SVE; ++SVI)
|
||||||
|
if ((*SVI)->getType()->isPointerTy())
|
||||||
|
if (AllocaInst *SAI =
|
||||||
|
dyn_cast<AllocaInst>((*SVI)->stripInBoundsOffsets()))
|
||||||
|
PostPromotionWorklist.insert(SAI);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
Changed |= splitAlloca(AI, S);
|
Changed |= splitAlloca(AI, S);
|
||||||
|
|
||||||
DEBUG(dbgs() << " Speculating PHIs\n");
|
DEBUG(dbgs() << " Speculating PHIs\n");
|
||||||
|
Loading…
Reference in New Issue
Block a user