When considering whether to inline Callee into Caller,

and that will make Caller too big to inline, see if it might be better to inline Caller into its callers instead. This situation is described in PR 2973, although I haven't tried the specific case in SPASS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@83602 91177308-0d34-0410-b5e6-96231b3b80d8
2024-11-01 15:11:24 +00:00 · 2009-10-09 00:11:32 +00:00 · 2009-10-09 00:11:32 +00:00 · e91b9a3b59
commit e91b9a3b59
parent 3645b01002
2 changed files with 181 additions and 6 deletions
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@ -189,9 +189,9 @@ bool Inliner::shouldInline(CallSite CS) {
  
  int Cost = IC.getValue();
  int CurrentThreshold = InlineThreshold;
-  Function *Fn = CS.getCaller();
-  if (Fn && !Fn->isDeclaration() &&
-      Fn->hasFnAttr(Attribute::OptimizeForSize) &&
+  Function *Caller = CS.getCaller();
+  if (Caller && !Caller->isDeclaration() &&
+      Caller->hasFnAttr(Attribute::OptimizeForSize) &&
      InlineLimit.getNumOccurrences() == 0 &&
      InlineThreshold != 50)
    CurrentThreshold = 50;
@ -203,8 +203,72 @@ bool Inliner::shouldInline(CallSite CS) {
    return false;
  }
  
+  // Try to detect the case where the current inlining candidate caller
+  // (call it B) is a static function and is an inlining candidate elsewhere,
+  // and the current candidate callee (call it C) is large enough that
+  // inlining it into B would make B too big to inline later.  In these
+  // circumstances it may be best not to inline C into B, but to inline B
+  // into its callers.
+  if (Caller->hasLocalLinkage()) {
+    int TotalSecondaryCost = 0;
+    bool outerCallsFound = false;
+    bool allOuterCallsWillBeInlined = true;
+    bool someOuterCallWouldNotBeInlined = false;
+    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
+         I != E; ++I) {
+      CallSite CS2 = CallSite::get(*I);
+
+      // If this isn't a call to Caller (it could be some other sort
+      // of reference) skip it.
+      if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
+        continue;
+
+      InlineCost IC2 = getInlineCost(CS2);
+      if (IC2.isNever())
+        allOuterCallsWillBeInlined = false;
+      if (IC2.isAlways() || IC2.isNever())
+        continue;
+
+      outerCallsFound = true;
+      int Cost2 = IC2.getValue();
+      int CurrentThreshold2 = InlineThreshold;
+      Function *Caller2 = CS2.getCaller();
+      if (Caller2 && !Caller2->isDeclaration() &&
+          Caller2->hasFnAttr(Attribute::OptimizeForSize) &&
+          InlineThreshold != 50)
+        CurrentThreshold2 = 50;
+
+      float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+        allOuterCallsWillBeInlined = false;
+
+      // See if we have this case.  The magic 6 is what InlineCost assigns
+      // for the call instruction, which we would be deleting.
+      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+          Cost2 + Cost - 6 >= (int)(CurrentThreshold2 * FudgeFactor2)) {
+        someOuterCallWouldNotBeInlined = true;
+        TotalSecondaryCost += Cost2;
+      }
+    }
+    // If all outer calls to Caller would get inlined, the cost for the last
+    // one is set very low by getInlineCost, in anticipation that Caller will
+    // be removed entirely.  We did not account for this above unless there
+    // is only one caller of Caller.
+    if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+      TotalSecondaryCost -= 15000;
+
+    if (outerCallsFound && someOuterCallWouldNotBeInlined && 
+        TotalSecondaryCost < Cost) {
+      DEBUG(errs() << "    NOT Inlining: " << *CS.getInstruction() << 
+           " Cost = " << Cost << 
+           ", outer Cost = " << TotalSecondaryCost << '\n');
+      return false;
+    }
+  }
+
  DEBUG(errs() << "    Inlining: cost=" << Cost
-        << ", Call: " << *CS.getInstruction() << "\n");
+        << ", Call: " << *CS.getInstruction() << '\n');
  return true;
 }

@ -232,7 +296,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
        CallSite CS = CallSite::get(I);
-        // If this this isn't a call, or it is a call to an intrinsic, it can
+        // If this isn't a call, or it is a call to an intrinsic, it can
        // never be inlined.
        if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
          continue;
@ -279,7 +343,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
      // try to do so.
      if (!shouldInline(CS))
        continue;
-      
+
      Function *Caller = CS.getCaller();
      // Attempt to inline the function...
      if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
--- a/test/Transforms/Inline/nested-inline.ll
+++ b/test/Transforms/Inline/nested-inline.ll
@ -0,0 +1,111 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; Test that bar and bar2 are both inlined throughout and removed.
+@A = weak global i32 0		; <i32*> [#uses=1]
+@B = weak global i32 0		; <i32*> [#uses=1]
+@C = weak global i32 0		; <i32*> [#uses=1]
+
+define fastcc void @foo(i32 %X) {
+entry:
+; CHECK: @foo
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @ext( i32* %ALL )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+; CHECK-NOT: @bar
+define internal fastcc void @bar(i32 %X) {
+entry:
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @foo( i32 %X )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+define internal fastcc void @bar2(i32 %X) {
+entry:
+	call void @foo( i32 %X )
+	ret void
+}
+
+declare void @ext(i32*)
+
+define void @test(i32 %X) {
+entry:
+; CHECK: test
+; CHECK-NOT: @bar
+	tail call fastcc void @bar( i32 %X )
+	tail call fastcc void @bar( i32 %X )
+	tail call fastcc void @bar2( i32 %X )
+	tail call fastcc void @bar2( i32 %X )
+	ret void
+; CHECK: ret
+}