mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-02 07:11:49 +00:00
Optimize InstCombine stack memory consumption
This patch reduces the stack memory consumption of the InstCombine function "isOnlyCopiedFromConstantGlobal() ", that in certain conditions could overflow the stack because of excessive recursiveness. For example, in a case like this: %0 = alloca [50025 x i32], align 4 %1 = getelementptr inbounds [50025 x i32]* %0, i64 0, i64 0 store i32 0, i32* %1 %2 = getelementptr inbounds i32* %1, i64 1 store i32 1, i32* %2 %3 = getelementptr inbounds i32* %2, i64 1 store i32 2, i32* %3 %4 = getelementptr inbounds i32* %3, i64 1 store i32 3, i32* %4 %5 = getelementptr inbounds i32* %4, i64 1 store i32 4, i32* %5 %6 = getelementptr inbounds i32* %5, i64 1 store i32 5, i32* %6 ... This piece of code crashes llvm when trying to apply instcombine on desktop. On embedded devices this could happen with a much lower limit of recursiveness. Some instructions (getelementptr and bitcasts) make the function recursively call itself on their uses, which is what makes the example above consume so much stack (it becomes a recursive depth-first tree visit with a very big depth). The patch changes the algorithm to be semantically equivalent, but iterative instead of recursive and the visiting order to be from a depth-first visit to a breadth-first visit (visit all the instructions of the current level before the ones of the next one). Now if a lot of memory is required a heap allocation is done instead of the the stack allocation, avoiding the possible crash. Reviewed By: rnk Differential Revision: http://reviews.llvm.org/D4355 Patch by Marcello Maggioni! We don't generally commit large stress test that look for out of memory conditions, so I didn't request that one be added to the patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212133 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a96332b9c7
commit
3ca3826528
@ -50,99 +50,102 @@ static bool pointsToConstantGlobal(Value *V) {
|
||||
/// can optimize this.
|
||||
static bool
|
||||
isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
|
||||
SmallVectorImpl<Instruction *> &ToDelete,
|
||||
bool IsOffset = false) {
|
||||
SmallVectorImpl<Instruction *> &ToDelete) {
|
||||
// We track lifetime intrinsics as we encounter them. If we decide to go
|
||||
// ahead and replace the value with the global, this lets the caller quickly
|
||||
// eliminate the markers.
|
||||
|
||||
for (Use &U : V->uses()) {
|
||||
Instruction *I = cast<Instruction>(U.getUser());
|
||||
SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
|
||||
ValuesToInspect.push_back(std::make_pair(V, false));
|
||||
while (!ValuesToInspect.empty()) {
|
||||
auto ValuePair = ValuesToInspect.pop_back_val();
|
||||
const bool IsOffset = ValuePair.second;
|
||||
for (auto &U : ValuePair.first->uses()) {
|
||||
Instruction *I = cast<Instruction>(U.getUser());
|
||||
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||
// Ignore non-volatile loads, they are always ok.
|
||||
if (!LI->isSimple()) return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
|
||||
// If uses of the bitcast are ok, we are ok.
|
||||
if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset))
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
|
||||
// If the GEP has all zero indices, it doesn't offset the pointer. If it
|
||||
// doesn't, it does.
|
||||
if (!isOnlyCopiedFromConstantGlobal(
|
||||
GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (CallSite CS = I) {
|
||||
// If this is the function being called then we treat it like a load and
|
||||
// ignore it.
|
||||
if (CS.isCallee(&U))
|
||||
continue;
|
||||
|
||||
// Inalloca arguments are clobbered by the call.
|
||||
unsigned ArgNo = CS.getArgumentNo(&U);
|
||||
if (CS.isInAllocaArgument(ArgNo))
|
||||
return false;
|
||||
|
||||
// If this is a readonly/readnone call site, then we know it is just a
|
||||
// load (but one that potentially returns the value itself), so we can
|
||||
// ignore it if we know that the value isn't captured.
|
||||
if (CS.onlyReadsMemory() &&
|
||||
(CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
|
||||
continue;
|
||||
|
||||
// If this is being passed as a byval argument, the caller is making a
|
||||
// copy, so it is only a read of the alloca.
|
||||
if (CS.isByValArgument(ArgNo))
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lifetime intrinsics can be handled by the caller.
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
|
||||
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
|
||||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
|
||||
assert(II->use_empty() && "Lifetime markers have no result to use!");
|
||||
ToDelete.push_back(II);
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||
// Ignore non-volatile loads, they are always ok.
|
||||
if (!LI->isSimple()) return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
|
||||
// If uses of the bitcast are ok, we are ok.
|
||||
ValuesToInspect.push_back(std::make_pair(I, IsOffset));
|
||||
continue;
|
||||
}
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
|
||||
// If the GEP has all zero indices, it doesn't offset the pointer. If it
|
||||
// doesn't, it does.
|
||||
ValuesToInspect.push_back(
|
||||
std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (CallSite CS = I) {
|
||||
// If this is the function being called then we treat it like a load and
|
||||
// ignore it.
|
||||
if (CS.isCallee(&U))
|
||||
continue;
|
||||
|
||||
// Inalloca arguments are clobbered by the call.
|
||||
unsigned ArgNo = CS.getArgumentNo(&U);
|
||||
if (CS.isInAllocaArgument(ArgNo))
|
||||
return false;
|
||||
|
||||
// If this is a readonly/readnone call site, then we know it is just a
|
||||
// load (but one that potentially returns the value itself), so we can
|
||||
// ignore it if we know that the value isn't captured.
|
||||
if (CS.onlyReadsMemory() &&
|
||||
(CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
|
||||
continue;
|
||||
|
||||
// If this is being passed as a byval argument, the caller is making a
|
||||
// copy, so it is only a read of the alloca.
|
||||
if (CS.isByValArgument(ArgNo))
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lifetime intrinsics can be handled by the caller.
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
|
||||
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
|
||||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
|
||||
assert(II->use_empty() && "Lifetime markers have no result to use!");
|
||||
ToDelete.push_back(II);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If this is isn't our memcpy/memmove, reject it as something we can't
|
||||
// handle.
|
||||
MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
|
||||
if (!MI)
|
||||
return false;
|
||||
|
||||
// If the transfer is using the alloca as a source of the transfer, then
|
||||
// ignore it since it is a load (unless the transfer is volatile).
|
||||
if (U.getOperandNo() == 1) {
|
||||
if (MI->isVolatile()) return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we already have seen a copy, reject the second one.
|
||||
if (TheCopy) return false;
|
||||
|
||||
// If the pointer has been offset from the start of the alloca, we can't
|
||||
// safely handle this.
|
||||
if (IsOffset) return false;
|
||||
|
||||
// If the memintrinsic isn't using the alloca as the dest, reject it.
|
||||
if (U.getOperandNo() != 0) return false;
|
||||
|
||||
// If the source of the memcpy/move is not a constant global, reject it.
|
||||
if (!pointsToConstantGlobal(MI->getSource()))
|
||||
return false;
|
||||
|
||||
// Otherwise, the transform is safe. Remember the copy instruction.
|
||||
TheCopy = MI;
|
||||
}
|
||||
|
||||
// If this is isn't our memcpy/memmove, reject it as something we can't
|
||||
// handle.
|
||||
MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
|
||||
if (!MI)
|
||||
return false;
|
||||
|
||||
// If the transfer is using the alloca as a source of the transfer, then
|
||||
// ignore it since it is a load (unless the transfer is volatile).
|
||||
if (U.getOperandNo() == 1) {
|
||||
if (MI->isVolatile()) return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we already have seen a copy, reject the second one.
|
||||
if (TheCopy) return false;
|
||||
|
||||
// If the pointer has been offset from the start of the alloca, we can't
|
||||
// safely handle this.
|
||||
if (IsOffset) return false;
|
||||
|
||||
// If the memintrinsic isn't using the alloca as the dest, reject it.
|
||||
if (U.getOperandNo() != 0) return false;
|
||||
|
||||
// If the source of the memcpy/move is not a constant global, reject it.
|
||||
if (!pointsToConstantGlobal(MI->getSource()))
|
||||
return false;
|
||||
|
||||
// Otherwise, the transform is safe. Remember the copy instruction.
|
||||
TheCopy = MI;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user