mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-30 17:33:24 +00:00
Prevent inlining of callees which allocate lots of memory into a recursive caller.
Example: void foo() { ... foo(); // I'm recursive! bar(); } bar() { int a[1000]; // large stack size } rdar://10853263 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164207 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
93ba133906
commit
92df026f0d
@ -36,6 +36,9 @@ namespace llvm {
|
|||||||
const int LastCallToStaticBonus = -15000;
|
const int LastCallToStaticBonus = -15000;
|
||||||
const int ColdccPenalty = 2000;
|
const int ColdccPenalty = 2000;
|
||||||
const int NoreturnPenalty = 10000;
|
const int NoreturnPenalty = 10000;
|
||||||
|
/// Do not inline functions which allocate this many bytes on the stack
|
||||||
|
/// when the caller is recursive.
|
||||||
|
const int TotalAllocaSizeRecursiveCaller = 1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Represents the cost of inlining a function.
|
/// \brief Represents the cost of inlining a function.
|
||||||
|
@ -51,9 +51,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
|||||||
int Cost;
|
int Cost;
|
||||||
const bool AlwaysInline;
|
const bool AlwaysInline;
|
||||||
|
|
||||||
bool IsRecursive;
|
bool IsCallerRecursive;
|
||||||
|
bool IsRecursiveCall;
|
||||||
bool ExposesReturnsTwice;
|
bool ExposesReturnsTwice;
|
||||||
bool HasDynamicAlloca;
|
bool HasDynamicAlloca;
|
||||||
|
/// Number of bytes allocated statically by the callee.
|
||||||
|
uint64_t AllocatedSize;
|
||||||
unsigned NumInstructions, NumVectorInstructions;
|
unsigned NumInstructions, NumVectorInstructions;
|
||||||
int FiftyPercentVectorBonus, TenPercentVectorBonus;
|
int FiftyPercentVectorBonus, TenPercentVectorBonus;
|
||||||
int VectorBonus;
|
int VectorBonus;
|
||||||
@ -126,7 +129,8 @@ public:
|
|||||||
CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
|
CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
|
||||||
: TD(TD), F(Callee), Threshold(Threshold), Cost(0),
|
: TD(TD), F(Callee), Threshold(Threshold), Cost(0),
|
||||||
AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
|
AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
|
||||||
IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
|
IsCallerRecursive(false), IsRecursiveCall(false),
|
||||||
|
ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
|
||||||
NumInstructions(0), NumVectorInstructions(0),
|
NumInstructions(0), NumVectorInstructions(0),
|
||||||
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
|
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
|
||||||
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
|
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
|
||||||
@ -269,6 +273,13 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
|
|||||||
// FIXME: Check whether inlining will turn a dynamic alloca into a static
|
// FIXME: Check whether inlining will turn a dynamic alloca into a static
|
||||||
// alloca, and handle that case.
|
// alloca, and handle that case.
|
||||||
|
|
||||||
|
// Accumulate the allocated size.
|
||||||
|
if (I.isStaticAlloca()) {
|
||||||
|
Type *Ty = I.getAllocatedType();
|
||||||
|
AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
|
||||||
|
Ty->getPrimitiveSizeInBits());
|
||||||
|
}
|
||||||
|
|
||||||
// We will happily inline static alloca instructions or dynamic alloca
|
// We will happily inline static alloca instructions or dynamic alloca
|
||||||
// instructions in always-inline situations.
|
// instructions in always-inline situations.
|
||||||
if (AlwaysInline || I.isStaticAlloca())
|
if (AlwaysInline || I.isStaticAlloca())
|
||||||
@ -625,7 +636,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
|
|||||||
if (F == CS.getInstruction()->getParent()->getParent()) {
|
if (F == CS.getInstruction()->getParent()->getParent()) {
|
||||||
// This flag will fully abort the analysis, so don't bother with anything
|
// This flag will fully abort the analysis, so don't bother with anything
|
||||||
// else.
|
// else.
|
||||||
IsRecursive = true;
|
IsRecursiveCall = true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -712,7 +723,14 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
|
|||||||
Cost += InlineConstants::InstrCost;
|
Cost += InlineConstants::InstrCost;
|
||||||
|
|
||||||
// If the visit this instruction detected an uninlinable pattern, abort.
|
// If the visit this instruction detected an uninlinable pattern, abort.
|
||||||
if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
|
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// If the caller is a recursive function then we don't want to inline
|
||||||
|
// functions which allocate a lot of stack space because it would increase
|
||||||
|
// the caller stack usage dramatically.
|
||||||
|
if (IsCallerRecursive &&
|
||||||
|
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (NumVectorInstructions > NumInstructions/2)
|
if (NumVectorInstructions > NumInstructions/2)
|
||||||
@ -831,12 +849,14 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||||||
Cost += InlineConstants::LastCallToStaticBonus;
|
Cost += InlineConstants::LastCallToStaticBonus;
|
||||||
|
|
||||||
// If the instruction after the call, or if the normal destination of the
|
// If the instruction after the call, or if the normal destination of the
|
||||||
// invoke is an unreachable instruction, the function is noreturn. As such,
|
// invoke is an unreachable instruction, the function is noreturn. As such,
|
||||||
// there is little point in inlining this unless there is literally zero cost.
|
// there is little point in inlining this unless there is literally zero
|
||||||
if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
|
// cost.
|
||||||
|
Instruction *Instr = CS.getInstruction();
|
||||||
|
if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
|
||||||
if (isa<UnreachableInst>(II->getNormalDest()->begin()))
|
if (isa<UnreachableInst>(II->getNormalDest()->begin()))
|
||||||
Threshold = 1;
|
Threshold = 1;
|
||||||
} else if (isa<UnreachableInst>(++BasicBlock::iterator(CS.getInstruction())))
|
} else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
|
||||||
Threshold = 1;
|
Threshold = 1;
|
||||||
|
|
||||||
// If this function uses the coldcc calling convention, prefer not to inline
|
// If this function uses the coldcc calling convention, prefer not to inline
|
||||||
@ -852,6 +872,20 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||||||
if (F.empty())
|
if (F.empty())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
Function *Caller = CS.getInstruction()->getParent()->getParent();
|
||||||
|
// Check if the caller function is recursive itself.
|
||||||
|
for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
|
||||||
|
U != E; ++U) {
|
||||||
|
CallSite Site(cast<Value>(*U));
|
||||||
|
if (!Site)
|
||||||
|
continue;
|
||||||
|
Instruction *I = Site.getInstruction();
|
||||||
|
if (I->getParent()->getParent() == Caller) {
|
||||||
|
IsCallerRecursive = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Track whether we've seen a return instruction. The first return
|
// Track whether we've seen a return instruction. The first return
|
||||||
// instruction is free, as at least one will usually disappear in inlining.
|
// instruction is free, as at least one will usually disappear in inlining.
|
||||||
bool HasReturn = false;
|
bool HasReturn = false;
|
||||||
@ -908,9 +942,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||||||
|
|
||||||
// We never want to inline functions that contain an indirectbr. This is
|
// We never want to inline functions that contain an indirectbr. This is
|
||||||
// incorrect because all the blockaddress's (in static global initializers
|
// incorrect because all the blockaddress's (in static global initializers
|
||||||
// for example) would be referring to the original function, and this indirect
|
// for example) would be referring to the original function, and this
|
||||||
// jump would jump from the inlined copy of the function into the original
|
// indirect jump would jump from the inlined copy of the function into the
|
||||||
// function which is extremely undefined behavior.
|
// original function which is extremely undefined behavior.
|
||||||
// FIXME: This logic isn't really right; we can safely inline functions
|
// FIXME: This logic isn't really right; we can safely inline functions
|
||||||
// with indirectbr's as long as no other function or global references the
|
// with indirectbr's as long as no other function or global references the
|
||||||
// blockaddress of a block within the current function. And as a QOI issue,
|
// blockaddress of a block within the current function. And as a QOI issue,
|
||||||
@ -928,8 +962,16 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||||||
// Analyze the cost of this block. If we blow through the threshold, this
|
// Analyze the cost of this block. If we blow through the threshold, this
|
||||||
// returns false, and we can bail on out.
|
// returns false, and we can bail on out.
|
||||||
if (!analyzeBlock(BB)) {
|
if (!analyzeBlock(BB)) {
|
||||||
if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
|
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// If the caller is a recursive function then we don't want to inline
|
||||||
|
// functions which allocate a lot of stack space because it would increase
|
||||||
|
// the caller stack usage dramatically.
|
||||||
|
if (IsCallerRecursive &&
|
||||||
|
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
|
||||||
|
return false;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -955,7 +997,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||||||
|
|
||||||
// If we're unable to select a particular successor, just count all of
|
// If we're unable to select a particular successor, just count all of
|
||||||
// them.
|
// them.
|
||||||
for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx)
|
for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
|
||||||
|
++TIdx)
|
||||||
BBWorklist.insert(TI->getSuccessor(TIdx));
|
BBWorklist.insert(TI->getSuccessor(TIdx));
|
||||||
|
|
||||||
// If we had any successors at this point, than post-inlining is likely to
|
// If we had any successors at this point, than post-inlining is likely to
|
||||||
@ -1003,7 +1046,8 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
|
|||||||
Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline())
|
Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline())
|
||||||
return llvm::InlineCost::getNever();
|
return llvm::InlineCost::getNever();
|
||||||
|
|
||||||
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n");
|
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
|
||||||
|
<< "...\n");
|
||||||
|
|
||||||
CallAnalyzer CA(TD, *Callee, Threshold);
|
CallAnalyzer CA(TD, *Callee, Threshold);
|
||||||
bool ShouldInline = CA.analyzeCall(CS);
|
bool ShouldInline = CA.analyzeCall(CS);
|
||||||
|
38
test/Transforms/Inline/recurseive.ll
Normal file
38
test/Transforms/Inline/recurseive.ll
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
; RUN: opt %s -inline -S | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||||
|
target triple = "i386-apple-darwin10.0"
|
||||||
|
|
||||||
|
; rdar://10853263
|
||||||
|
|
||||||
|
; Make sure that the callee is still here.
|
||||||
|
; CHECK: define i32 @callee
|
||||||
|
define i32 @callee(i32 %param) {
|
||||||
|
%yyy = alloca [100000 x i8]
|
||||||
|
%r = bitcast [100000 x i8]* %yyy to i8*
|
||||||
|
call void @foo2(i8* %r)
|
||||||
|
ret i32 4
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: define i32 @caller
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NOT: alloca
|
||||||
|
; CHECK: ret
|
||||||
|
define i32 @caller(i32 %param) {
|
||||||
|
entry:
|
||||||
|
%t = call i32 @foo(i32 %param)
|
||||||
|
%cmp = icmp eq i32 %t, -1
|
||||||
|
br i1 %cmp, label %exit, label %cont
|
||||||
|
|
||||||
|
cont:
|
||||||
|
%r = call i32 @caller(i32 %t)
|
||||||
|
%f = call i32 @callee(i32 %r)
|
||||||
|
br label %cont
|
||||||
|
exit:
|
||||||
|
ret i32 4
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @foo2(i8* %in)
|
||||||
|
|
||||||
|
declare i32 @foo(i32 %param)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user