From bccb41afc8542f01358341451cac85a7115d8763 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 1 May 2010 06:38:43 +0000 Subject: [PATCH] fix PR5009 by making CGSCCPM realize that a call was devirtualized if an indirect call site was removed and a direct one was added, not just if an indirect call site was modified to be direct. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102830 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/IPA/CallGraphSCCPass.cpp | 41 ++++++++++-- test/Transforms/Inline/devirtualize-3.ll | 79 ++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 test/Transforms/Inline/devirtualize-3.ll diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 288a7ef1583..0c01ee5b828 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -191,6 +191,10 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, // Walk the function body looking for call sites. Sync up the call sites in // CGN with those actually in the function. + + // Keep track of the number of direct and indirect calls that were + // invalidated and removed. + unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; // Get the set of call sites currently in the function. for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { @@ -209,6 +213,12 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); + // If this was an indirect call site, count it. + if (I->second->getFunction() == 0) + ++NumIndirectRemoved; + else + ++NumDirectRemoved; + // Just remove the edge from the set of callees, keep track of whether // I points to the last element of the vector. bool WasLast = I + 1 == E; @@ -230,6 +240,9 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, } // Loop over all of the instructions in the function, getting the callsites. + // Keep track of the number of direct/indirect calls added. + unsigned NumDirectAdded = 0, NumIndirectAdded = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { CallSite CS = CallSite::get(I); @@ -286,19 +299,34 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); - // If the call site didn't exist in the CGN yet, add it. We assume that - // newly introduced call sites won't be indirect. This could be fixed - // in the future. + // If the call site didn't exist in the CGN yet, add it. CallGraphNode *CalleeNode; - if (Function *Callee = CS.getCalledFunction()) + if (Function *Callee = CS.getCalledFunction()) { CalleeNode = CG.getOrInsertFunction(Callee); - else + ++NumDirectAdded; + } else { CalleeNode = CG.getCallsExternalNode(); + ++NumIndirectAdded; + } CGN->addCalledFunction(CS, CalleeNode); MadeChange = true; } + // We scanned the old callgraph node, removing invalidated call sites and + // then added back newly found call sites. One thing that can happen is + // that an old indirect call site was deleted and replaced with a new direct + // call. In this case, we have devirtualized a call, and CGSCCPM would like + // to iteratively optimize the new code. Unfortunately, we don't really + // have a great way to detect when this happens. As an approximation, we + // just look at whether the number of indirect calls is reduced and the + // number of direct calls is increased. There are tons of ways to fool this + // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a + // direct call) but this is close enough. + if (NumIndirectRemoved > NumIndirectAdded && + NumDirectRemoved < NumDirectAdded) + DevirtualizedCall = true; + // After scanning this function, if we still have entries in callsites, then // they are dangling pointers. WeakVH should save us for this, so abort if // this happens. @@ -315,6 +343,9 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); I != E; ++I) (*I)->dump(); + if (DevirtualizedCall) + dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; + } else { dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; } diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll new file mode 100644 index 00000000000..0a50786498d --- /dev/null +++ b/test/Transforms/Inline/devirtualize-3.ll @@ -0,0 +1,79 @@ +; RUN: opt -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s +; PR5009 + +; CHECK: define i32 @main() +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @exit(i32 38) + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%struct.cont_t = type { void (i8*, i32)*, i8* } +%struct.foo_sf_t = type { %struct.cont_t*, i32 } + +define i32 @main() nounwind ssp { +entry: + %cont = alloca %struct.cont_t, align 8 ; <%struct.cont_t*> [#uses=4] + %tmp = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; [#uses=1] + %tmp1 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; [#uses=2] + store void (i8*, i32)* bitcast (void (%struct.cont_t*, i32)* @quit to void (i8*, i32)*), void (i8*, i32)** %tmp1 + %tmp2 = load void (i8*, i32)** %tmp1 ; [#uses=1] + store void (i8*, i32)* %tmp2, void (i8*, i32)** %tmp + %tmp3 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 1 ; [#uses=1] + store i8* null, i8** %tmp3 + call void @foo(%struct.cont_t* %cont) + ret i32 0 +} + +define internal void @quit(%struct.cont_t* %cont, i32 %rcode) nounwind ssp { +entry: + call void @exit(i32 %rcode) noreturn + unreachable +} + +define internal void @foo(%struct.cont_t* %c) nounwind ssp { +entry: + %sf = alloca %struct.foo_sf_t, align 8 ; <%struct.foo_sf_t*> [#uses=3] + %next = alloca %struct.cont_t, align 8 ; <%struct.cont_t*> [#uses=3] + %tmp = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1] + store %struct.cont_t* %c, %struct.cont_t** %tmp + %tmp2 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; [#uses=1] + store i32 2, i32* %tmp2 + %tmp4 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 0 ; [#uses=1] + store void (i8*, i32)* bitcast (void (%struct.foo_sf_t*, i32)* @foo2 to void (i8*, i32)*), void (i8*, i32)** %tmp4 + %tmp5 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 1 ; [#uses=1] + %conv = bitcast %struct.foo_sf_t* %sf to i8* ; [#uses=1] + store i8* %conv, i8** %tmp5 + call void @bar(%struct.cont_t* %next, i32 14) + ret void +} + +define internal void @foo2(%struct.foo_sf_t* %sf, i32 %y) nounwind ssp { +entry: + %tmp1 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1] + %tmp2 = load %struct.cont_t** %tmp1 ; <%struct.cont_t*> [#uses=1] + %tmp3 = getelementptr inbounds %struct.cont_t* %tmp2, i32 0, i32 0 ; [#uses=1] + %tmp4 = load void (i8*, i32)** %tmp3 ; [#uses=1] + %tmp6 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1] + %tmp7 = load %struct.cont_t** %tmp6 ; <%struct.cont_t*> [#uses=1] + %conv = bitcast %struct.cont_t* %tmp7 to i8* ; [#uses=1] + %tmp9 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; [#uses=1] + %tmp10 = load i32* %tmp9 ; [#uses=1] + %mul = mul i32 %tmp10, %y ; [#uses=1] + call void %tmp4(i8* %conv, i32 %mul) + ret void +} + +define internal void @bar(%struct.cont_t* %c, i32 %y) nounwind ssp { +entry: + %tmp1 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 0 ; [#uses=1] + %tmp2 = load void (i8*, i32)** %tmp1 ; [#uses=1] + %tmp4 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 1 ; [#uses=1] + %tmp5 = load i8** %tmp4 ; [#uses=1] + %add = add nsw i32 %y, 5 ; [#uses=1] + call void %tmp2(i8* %tmp5, i32 %add) + ret void +} + +declare void @exit(i32) noreturn +