When considering whether to inline Callee into Caller,

and that will make Caller too big to inline, see if it
might be better to inline Caller into its callers instead.
This situation is described in PR 2973, although I haven't
tried the specific case in SPASS.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@83602 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dale Johannesen 2009-10-09 00:11:32 +00:00
parent 3645b01002
commit e91b9a3b59
2 changed files with 181 additions and 6 deletions

View File

@ -189,9 +189,9 @@ bool Inliner::shouldInline(CallSite CS) {
int Cost = IC.getValue();
int CurrentThreshold = InlineThreshold;
Function *Fn = CS.getCaller();
if (Fn && !Fn->isDeclaration() &&
Fn->hasFnAttr(Attribute::OptimizeForSize) &&
Function *Caller = CS.getCaller();
if (Caller && !Caller->isDeclaration() &&
Caller->hasFnAttr(Attribute::OptimizeForSize) &&
InlineLimit.getNumOccurrences() == 0 &&
InlineThreshold != 50)
CurrentThreshold = 50;
@ -203,8 +203,72 @@ bool Inliner::shouldInline(CallSite CS) {
return false;
}
// Try to detect the case where the current inlining candidate caller
// (call it B) is a static function and is an inlining candidate elsewhere,
// and the current candidate callee (call it C) is large enough that
// inlining it into B would make B too big to inline later. In these
// circumstances it may be best not to inline C into B, but to inline B
// into its callers.
if (Caller->hasLocalLinkage()) {
int TotalSecondaryCost = 0;
bool outerCallsFound = false;
bool allOuterCallsWillBeInlined = true;
bool someOuterCallWouldNotBeInlined = false;
for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
I != E; ++I) {
CallSite CS2 = CallSite::get(*I);
// If this isn't a call to Caller (it could be some other sort
// of reference) skip it.
if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
continue;
InlineCost IC2 = getInlineCost(CS2);
if (IC2.isNever())
allOuterCallsWillBeInlined = false;
if (IC2.isAlways() || IC2.isNever())
continue;
outerCallsFound = true;
int Cost2 = IC2.getValue();
int CurrentThreshold2 = InlineThreshold;
Function *Caller2 = CS2.getCaller();
if (Caller2 && !Caller2->isDeclaration() &&
Caller2->hasFnAttr(Attribute::OptimizeForSize) &&
InlineThreshold != 50)
CurrentThreshold2 = 50;
float FudgeFactor2 = getInlineFudgeFactor(CS2);
if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
allOuterCallsWillBeInlined = false;
// See if we have this case. The magic 6 is what InlineCost assigns
// for the call instruction, which we would be deleting.
if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
Cost2 + Cost - 6 >= (int)(CurrentThreshold2 * FudgeFactor2)) {
someOuterCallWouldNotBeInlined = true;
TotalSecondaryCost += Cost2;
}
}
// If all outer calls to Caller would get inlined, the cost for the last
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
TotalSecondaryCost -= 15000;
if (outerCallsFound && someOuterCallWouldNotBeInlined &&
TotalSecondaryCost < Cost) {
DEBUG(errs() << " NOT Inlining: " << *CS.getInstruction() <<
" Cost = " << Cost <<
", outer Cost = " << TotalSecondaryCost << '\n');
return false;
}
}
DEBUG(errs() << " Inlining: cost=" << Cost
<< ", Call: " << *CS.getInstruction() << "\n");
<< ", Call: " << *CS.getInstruction() << '\n');
return true;
}
@ -232,7 +296,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
CallSite CS = CallSite::get(I);
// If this this isn't a call, or it is a call to an intrinsic, it can
// If this isn't a call, or it is a call to an intrinsic, it can
// never be inlined.
if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
continue;
@ -279,7 +343,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
// try to do so.
if (!shouldInline(CS))
continue;
Function *Caller = CS.getCaller();
// Attempt to inline the function...
if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))

View File

@ -0,0 +1,111 @@
; RUN: opt < %s -inline -S | FileCheck %s
; Test that bar and bar2 are both inlined throughout and removed.
@A = weak global i32 0 ; <i32*> [#uses=1]
@B = weak global i32 0 ; <i32*> [#uses=1]
@C = weak global i32 0 ; <i32*> [#uses=1]
define fastcc void @foo(i32 %X) {
entry:
; CHECK: @foo
%ALL = alloca i32, align 4 ; <i32*> [#uses=1]
%tmp1 = and i32 %X, 1 ; <i32> [#uses=1]
%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
cond_true: ; preds = %entry
store i32 1, i32* @A
br label %cond_next
cond_next: ; preds = %cond_true, %entry
%tmp4 = and i32 %X, 2 ; <i32> [#uses=1]
%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1]
br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
cond_true5: ; preds = %cond_next
store i32 1, i32* @B
br label %cond_next7
cond_next7: ; preds = %cond_true5, %cond_next
%tmp10 = and i32 %X, 4 ; <i32> [#uses=1]
%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1]
br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
cond_true11: ; preds = %cond_next7
store i32 1, i32* @C
br label %cond_next13
cond_next13: ; preds = %cond_true11, %cond_next7
%tmp16 = and i32 %X, 8 ; <i32> [#uses=1]
%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1]
br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
cond_true17: ; preds = %cond_next13
call void @ext( i32* %ALL )
ret void
UnifiedReturnBlock: ; preds = %cond_next13
ret void
}
; CHECK-NOT: @bar
define internal fastcc void @bar(i32 %X) {
entry:
%ALL = alloca i32, align 4 ; <i32*> [#uses=1]
%tmp1 = and i32 %X, 1 ; <i32> [#uses=1]
%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
cond_true: ; preds = %entry
store i32 1, i32* @A
br label %cond_next
cond_next: ; preds = %cond_true, %entry
%tmp4 = and i32 %X, 2 ; <i32> [#uses=1]
%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1]
br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
cond_true5: ; preds = %cond_next
store i32 1, i32* @B
br label %cond_next7
cond_next7: ; preds = %cond_true5, %cond_next
%tmp10 = and i32 %X, 4 ; <i32> [#uses=1]
%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1]
br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
cond_true11: ; preds = %cond_next7
store i32 1, i32* @C
br label %cond_next13
cond_next13: ; preds = %cond_true11, %cond_next7
%tmp16 = and i32 %X, 8 ; <i32> [#uses=1]
%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1]
br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
cond_true17: ; preds = %cond_next13
call void @foo( i32 %X )
ret void
UnifiedReturnBlock: ; preds = %cond_next13
ret void
}
define internal fastcc void @bar2(i32 %X) {
entry:
call void @foo( i32 %X )
ret void
}
declare void @ext(i32*)
define void @test(i32 %X) {
entry:
; CHECK: test
; CHECK-NOT: @bar
tail call fastcc void @bar( i32 %X )
tail call fastcc void @bar( i32 %X )
tail call fastcc void @bar2( i32 %X )
tail call fastcc void @bar2( i32 %X )
ret void
; CHECK: ret
}