implement a nice little efficiency hack in the inliner. Since we're now

running IPSCCP early, and we run functionattrs interlaced with the inliner,
we often (particularly for small or noop functions) completely propagate
all of the information about a call to its call site in IPSSCP (making a call
dead) and functionattrs is smart enough to realize that the function is
readonly (because it is interlaced with inliner).

To improve compile time and make the inliner threshold more accurate, realize
that we don't have to inline dead readonly function calls.  Instead, just 
delete the call.  This happens all the time for C++ codes, here are some
counters from opt/llvm-ld counting the number of times calls were deleted vs
inlined on various apps:

Tramp3d opt:
  5033 inline                - Number of call sites deleted, not inlined
 24596 inline                - Number of functions inlined
llvm-ld:
  667 inline           - Number of functions deleted because all callers found
  699 inline           - Number of functions inlined

483.xalancbmk opt:
  8096 inline                - Number of call sites deleted, not inlined
 62528 inline                - Number of functions inlined
llvm-ld:
   217 inline           - Number of allocas merged together
  2158 inline           - Number of functions inlined

471.omnetpp:
  331 inline                - Number of call sites deleted, not inlined
 8981 inline                - Number of functions inlined
llvm-ld:
  171 inline           - Number of functions deleted because all callers found
  629 inline           - Number of functions inlined


Deleting a call is much faster than inlining it, and is insensitive to the
size of the callee. :)



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@86975 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2009-11-12 07:56:08 +00:00
parent 60f9061820
commit dbab4dc942
2 changed files with 54 additions and 16 deletions

View File

@ -32,6 +32,7 @@
using namespace llvm;
STATISTIC(NumInlined, "Number of functions inlined");
STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
@ -336,23 +337,39 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
CallSite CS = CallSites[CSi];
Function *Callee = CS.getCalledFunction();
// We can only inline direct calls to non-declarations.
if (Callee == 0 || Callee->isDeclaration()) continue;
// If the policy determines that we should inline this function,
// try to do so.
if (!shouldInline(CS))
continue;
Function *Caller = CS.getCaller();
// Attempt to inline the function...
if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
continue;
Function *Callee = CS.getCalledFunction();
// If this call site is dead and it is to a readonly function, we should
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
if (CS.getInstruction()->use_empty() &&
!CS.getInstruction()->mayHaveSideEffects()) {
DEBUG(errs() << " -> Deleting dead call: "
<< *CS.getInstruction() << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
CG[Caller]->removeCallEdgeFor(CS);
CS.getInstruction()->eraseFromParent();
++NumCallsDeleted;
} else {
// We can only inline direct calls to non-declarations.
if (Callee == 0 || Callee->isDeclaration()) continue;
// If we inlined the last possible call site to the function, delete the
// function body now.
if (Callee->use_empty() && Callee->hasLocalLinkage() &&
// If the policy determines that we should inline this function,
// try to do so.
if (!shouldInline(CS))
continue;
// Attempt to inline the function...
if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
continue;
++NumInlined;
}
// If we inlined or deleted the last possible call site to the function,
// delete the function body now.
if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
// TODO: Can remove if in SCC now.
!SCCFunctions.count(Callee) &&
@ -391,7 +408,6 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
}
--CSi;
++NumInlined;
Changed = true;
LocalChange = true;
}

View File

@ -0,0 +1,22 @@
; RUN: opt %s -S -inline -functionattrs -stats |& grep {Number of call sites deleted, not inlined}
; RUN: opt %s -S -inline -stats |& grep {Number of functions inlined}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "i386-apple-darwin9.8"
define internal i32 @test(i32 %x, i32 %y, i32 %z) nounwind {
entry:
%0 = add nsw i32 %y, %z ; <i32> [#uses=1]
%1 = mul i32 %0, %x ; <i32> [#uses=1]
%2 = mul i32 %y, %z ; <i32> [#uses=1]
%3 = add nsw i32 %1, %2 ; <i32> [#uses=1]
ret i32 %3
}
define i32 @test2() nounwind {
entry:
%0 = call i32 @test(i32 1, i32 2, i32 4) nounwind ; <i32> [#uses=1]
ret i32 14
}