mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-01 15:11:24 +00:00
When considering whether to inline Callee into Caller,
and that will make Caller too big to inline, see if it might be better to inline Caller into its callers instead. This situation is described in PR 2973, although I haven't tried the specific case in SPASS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@83602 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3645b01002
commit
e91b9a3b59
@ -189,9 +189,9 @@ bool Inliner::shouldInline(CallSite CS) {
|
||||
|
||||
int Cost = IC.getValue();
|
||||
int CurrentThreshold = InlineThreshold;
|
||||
Function *Fn = CS.getCaller();
|
||||
if (Fn && !Fn->isDeclaration() &&
|
||||
Fn->hasFnAttr(Attribute::OptimizeForSize) &&
|
||||
Function *Caller = CS.getCaller();
|
||||
if (Caller && !Caller->isDeclaration() &&
|
||||
Caller->hasFnAttr(Attribute::OptimizeForSize) &&
|
||||
InlineLimit.getNumOccurrences() == 0 &&
|
||||
InlineThreshold != 50)
|
||||
CurrentThreshold = 50;
|
||||
@ -203,8 +203,72 @@ bool Inliner::shouldInline(CallSite CS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to detect the case where the current inlining candidate caller
|
||||
// (call it B) is a static function and is an inlining candidate elsewhere,
|
||||
// and the current candidate callee (call it C) is large enough that
|
||||
// inlining it into B would make B too big to inline later. In these
|
||||
// circumstances it may be best not to inline C into B, but to inline B
|
||||
// into its callers.
|
||||
if (Caller->hasLocalLinkage()) {
|
||||
int TotalSecondaryCost = 0;
|
||||
bool outerCallsFound = false;
|
||||
bool allOuterCallsWillBeInlined = true;
|
||||
bool someOuterCallWouldNotBeInlined = false;
|
||||
for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
|
||||
I != E; ++I) {
|
||||
CallSite CS2 = CallSite::get(*I);
|
||||
|
||||
// If this isn't a call to Caller (it could be some other sort
|
||||
// of reference) skip it.
|
||||
if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
|
||||
continue;
|
||||
|
||||
InlineCost IC2 = getInlineCost(CS2);
|
||||
if (IC2.isNever())
|
||||
allOuterCallsWillBeInlined = false;
|
||||
if (IC2.isAlways() || IC2.isNever())
|
||||
continue;
|
||||
|
||||
outerCallsFound = true;
|
||||
int Cost2 = IC2.getValue();
|
||||
int CurrentThreshold2 = InlineThreshold;
|
||||
Function *Caller2 = CS2.getCaller();
|
||||
if (Caller2 && !Caller2->isDeclaration() &&
|
||||
Caller2->hasFnAttr(Attribute::OptimizeForSize) &&
|
||||
InlineThreshold != 50)
|
||||
CurrentThreshold2 = 50;
|
||||
|
||||
float FudgeFactor2 = getInlineFudgeFactor(CS2);
|
||||
|
||||
if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
|
||||
allOuterCallsWillBeInlined = false;
|
||||
|
||||
// See if we have this case. The magic 6 is what InlineCost assigns
|
||||
// for the call instruction, which we would be deleting.
|
||||
if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
|
||||
Cost2 + Cost - 6 >= (int)(CurrentThreshold2 * FudgeFactor2)) {
|
||||
someOuterCallWouldNotBeInlined = true;
|
||||
TotalSecondaryCost += Cost2;
|
||||
}
|
||||
}
|
||||
// If all outer calls to Caller would get inlined, the cost for the last
|
||||
// one is set very low by getInlineCost, in anticipation that Caller will
|
||||
// be removed entirely. We did not account for this above unless there
|
||||
// is only one caller of Caller.
|
||||
if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
|
||||
TotalSecondaryCost -= 15000;
|
||||
|
||||
if (outerCallsFound && someOuterCallWouldNotBeInlined &&
|
||||
TotalSecondaryCost < Cost) {
|
||||
DEBUG(errs() << " NOT Inlining: " << *CS.getInstruction() <<
|
||||
" Cost = " << Cost <<
|
||||
", outer Cost = " << TotalSecondaryCost << '\n');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG(errs() << " Inlining: cost=" << Cost
|
||||
<< ", Call: " << *CS.getInstruction() << "\n");
|
||||
<< ", Call: " << *CS.getInstruction() << '\n');
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -232,7 +296,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
|
||||
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
|
||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||
CallSite CS = CallSite::get(I);
|
||||
// If this this isn't a call, or it is a call to an intrinsic, it can
|
||||
// If this isn't a call, or it is a call to an intrinsic, it can
|
||||
// never be inlined.
|
||||
if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
|
||||
continue;
|
||||
@ -279,7 +343,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
|
||||
// try to do so.
|
||||
if (!shouldInline(CS))
|
||||
continue;
|
||||
|
||||
|
||||
Function *Caller = CS.getCaller();
|
||||
// Attempt to inline the function...
|
||||
if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
|
||||
|
111
test/Transforms/Inline/nested-inline.ll
Normal file
111
test/Transforms/Inline/nested-inline.ll
Normal file
@ -0,0 +1,111 @@
|
||||
; RUN: opt < %s -inline -S | FileCheck %s
|
||||
; Test that bar and bar2 are both inlined throughout and removed.
|
||||
@A = weak global i32 0 ; <i32*> [#uses=1]
|
||||
@B = weak global i32 0 ; <i32*> [#uses=1]
|
||||
@C = weak global i32 0 ; <i32*> [#uses=1]
|
||||
|
||||
define fastcc void @foo(i32 %X) {
|
||||
entry:
|
||||
; CHECK: @foo
|
||||
%ALL = alloca i32, align 4 ; <i32*> [#uses=1]
|
||||
%tmp1 = and i32 %X, 1 ; <i32> [#uses=1]
|
||||
%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
|
||||
|
||||
cond_true: ; preds = %entry
|
||||
store i32 1, i32* @A
|
||||
br label %cond_next
|
||||
|
||||
cond_next: ; preds = %cond_true, %entry
|
||||
%tmp4 = and i32 %X, 2 ; <i32> [#uses=1]
|
||||
%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
|
||||
|
||||
cond_true5: ; preds = %cond_next
|
||||
store i32 1, i32* @B
|
||||
br label %cond_next7
|
||||
|
||||
cond_next7: ; preds = %cond_true5, %cond_next
|
||||
%tmp10 = and i32 %X, 4 ; <i32> [#uses=1]
|
||||
%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
|
||||
|
||||
cond_true11: ; preds = %cond_next7
|
||||
store i32 1, i32* @C
|
||||
br label %cond_next13
|
||||
|
||||
cond_next13: ; preds = %cond_true11, %cond_next7
|
||||
%tmp16 = and i32 %X, 8 ; <i32> [#uses=1]
|
||||
%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
|
||||
|
||||
cond_true17: ; preds = %cond_next13
|
||||
call void @ext( i32* %ALL )
|
||||
ret void
|
||||
|
||||
UnifiedReturnBlock: ; preds = %cond_next13
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: @bar
|
||||
define internal fastcc void @bar(i32 %X) {
|
||||
entry:
|
||||
%ALL = alloca i32, align 4 ; <i32*> [#uses=1]
|
||||
%tmp1 = and i32 %X, 1 ; <i32> [#uses=1]
|
||||
%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
|
||||
|
||||
cond_true: ; preds = %entry
|
||||
store i32 1, i32* @A
|
||||
br label %cond_next
|
||||
|
||||
cond_next: ; preds = %cond_true, %entry
|
||||
%tmp4 = and i32 %X, 2 ; <i32> [#uses=1]
|
||||
%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
|
||||
|
||||
cond_true5: ; preds = %cond_next
|
||||
store i32 1, i32* @B
|
||||
br label %cond_next7
|
||||
|
||||
cond_next7: ; preds = %cond_true5, %cond_next
|
||||
%tmp10 = and i32 %X, 4 ; <i32> [#uses=1]
|
||||
%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
|
||||
|
||||
cond_true11: ; preds = %cond_next7
|
||||
store i32 1, i32* @C
|
||||
br label %cond_next13
|
||||
|
||||
cond_next13: ; preds = %cond_true11, %cond_next7
|
||||
%tmp16 = and i32 %X, 8 ; <i32> [#uses=1]
|
||||
%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1]
|
||||
br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
|
||||
|
||||
cond_true17: ; preds = %cond_next13
|
||||
call void @foo( i32 %X )
|
||||
ret void
|
||||
|
||||
UnifiedReturnBlock: ; preds = %cond_next13
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal fastcc void @bar2(i32 %X) {
|
||||
entry:
|
||||
call void @foo( i32 %X )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @ext(i32*)
|
||||
|
||||
define void @test(i32 %X) {
|
||||
entry:
|
||||
; CHECK: test
|
||||
; CHECK-NOT: @bar
|
||||
tail call fastcc void @bar( i32 %X )
|
||||
tail call fastcc void @bar( i32 %X )
|
||||
tail call fastcc void @bar2( i32 %X )
|
||||
tail call fastcc void @bar2( i32 %X )
|
||||
ret void
|
||||
; CHECK: ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user