mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-19 04:32:19 +00:00
fix PR5009 by making CGSCCPM realize that a call was devirtualized
if an indirect call site was removed and a direct one was added, not just if an indirect call site was modified to be direct. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102830 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1951e108be
commit
bccb41afc8
@ -191,6 +191,10 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
|
||||
|
||||
// Walk the function body looking for call sites. Sync up the call sites in
|
||||
// CGN with those actually in the function.
|
||||
|
||||
// Keep track of the number of direct and indirect calls that were
|
||||
// invalidated and removed.
|
||||
unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
|
||||
|
||||
// Get the set of call sites currently in the function.
|
||||
for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
|
||||
@ -209,6 +213,12 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
|
||||
assert(!CheckingMode &&
|
||||
"CallGraphSCCPass did not update the CallGraph correctly!");
|
||||
|
||||
// If this was an indirect call site, count it.
|
||||
if (I->second->getFunction() == 0)
|
||||
++NumIndirectRemoved;
|
||||
else
|
||||
++NumDirectRemoved;
|
||||
|
||||
// Just remove the edge from the set of callees, keep track of whether
|
||||
// I points to the last element of the vector.
|
||||
bool WasLast = I + 1 == E;
|
||||
@ -230,6 +240,9 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
|
||||
}
|
||||
|
||||
// Loop over all of the instructions in the function, getting the callsites.
|
||||
// Keep track of the number of direct/indirect calls added.
|
||||
unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
|
||||
|
||||
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
|
||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||
CallSite CS = CallSite::get(I);
|
||||
@ -286,19 +299,34 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
|
||||
assert(!CheckingMode &&
|
||||
"CallGraphSCCPass did not update the CallGraph correctly!");
|
||||
|
||||
// If the call site didn't exist in the CGN yet, add it. We assume that
|
||||
// newly introduced call sites won't be indirect. This could be fixed
|
||||
// in the future.
|
||||
// If the call site didn't exist in the CGN yet, add it.
|
||||
CallGraphNode *CalleeNode;
|
||||
if (Function *Callee = CS.getCalledFunction())
|
||||
if (Function *Callee = CS.getCalledFunction()) {
|
||||
CalleeNode = CG.getOrInsertFunction(Callee);
|
||||
else
|
||||
++NumDirectAdded;
|
||||
} else {
|
||||
CalleeNode = CG.getCallsExternalNode();
|
||||
++NumIndirectAdded;
|
||||
}
|
||||
|
||||
CGN->addCalledFunction(CS, CalleeNode);
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
// We scanned the old callgraph node, removing invalidated call sites and
|
||||
// then added back newly found call sites. One thing that can happen is
|
||||
// that an old indirect call site was deleted and replaced with a new direct
|
||||
// call. In this case, we have devirtualized a call, and CGSCCPM would like
|
||||
// to iteratively optimize the new code. Unfortunately, we don't really
|
||||
// have a great way to detect when this happens. As an approximation, we
|
||||
// just look at whether the number of indirect calls is reduced and the
|
||||
// number of direct calls is increased. There are tons of ways to fool this
|
||||
// (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
|
||||
// direct call) but this is close enough.
|
||||
if (NumIndirectRemoved > NumIndirectAdded &&
|
||||
NumDirectRemoved < NumDirectAdded)
|
||||
DevirtualizedCall = true;
|
||||
|
||||
// After scanning this function, if we still have entries in callsites, then
|
||||
// they are dangling pointers. WeakVH should save us for this, so abort if
|
||||
// this happens.
|
||||
@ -315,6 +343,9 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
|
||||
for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
|
||||
I != E; ++I)
|
||||
(*I)->dump();
|
||||
if (DevirtualizedCall)
|
||||
dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
|
||||
|
||||
} else {
|
||||
dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
|
||||
}
|
||||
|
79
test/Transforms/Inline/devirtualize-3.ll
Normal file
79
test/Transforms/Inline/devirtualize-3.ll
Normal file
@ -0,0 +1,79 @@
|
||||
; RUN: opt -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
|
||||
; PR5009
|
||||
|
||||
; CHECK: define i32 @main()
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: call void @exit(i32 38)
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.0.0"
|
||||
|
||||
%struct.cont_t = type { void (i8*, i32)*, i8* }
|
||||
%struct.foo_sf_t = type { %struct.cont_t*, i32 }
|
||||
|
||||
define i32 @main() nounwind ssp {
|
||||
entry:
|
||||
%cont = alloca %struct.cont_t, align 8 ; <%struct.cont_t*> [#uses=4]
|
||||
%tmp = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
|
||||
%tmp1 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=2]
|
||||
store void (i8*, i32)* bitcast (void (%struct.cont_t*, i32)* @quit to void (i8*, i32)*), void (i8*, i32)** %tmp1
|
||||
%tmp2 = load void (i8*, i32)** %tmp1 ; <void (i8*, i32)*> [#uses=1]
|
||||
store void (i8*, i32)* %tmp2, void (i8*, i32)** %tmp
|
||||
%tmp3 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 1 ; <i8**> [#uses=1]
|
||||
store i8* null, i8** %tmp3
|
||||
call void @foo(%struct.cont_t* %cont)
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define internal void @quit(%struct.cont_t* %cont, i32 %rcode) nounwind ssp {
|
||||
entry:
|
||||
call void @exit(i32 %rcode) noreturn
|
||||
unreachable
|
||||
}
|
||||
|
||||
define internal void @foo(%struct.cont_t* %c) nounwind ssp {
|
||||
entry:
|
||||
%sf = alloca %struct.foo_sf_t, align 8 ; <%struct.foo_sf_t*> [#uses=3]
|
||||
%next = alloca %struct.cont_t, align 8 ; <%struct.cont_t*> [#uses=3]
|
||||
%tmp = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
|
||||
store %struct.cont_t* %c, %struct.cont_t** %tmp
|
||||
%tmp2 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
|
||||
store i32 2, i32* %tmp2
|
||||
%tmp4 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
|
||||
store void (i8*, i32)* bitcast (void (%struct.foo_sf_t*, i32)* @foo2 to void (i8*, i32)*), void (i8*, i32)** %tmp4
|
||||
%tmp5 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 1 ; <i8**> [#uses=1]
|
||||
%conv = bitcast %struct.foo_sf_t* %sf to i8* ; <i8*> [#uses=1]
|
||||
store i8* %conv, i8** %tmp5
|
||||
call void @bar(%struct.cont_t* %next, i32 14)
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @foo2(%struct.foo_sf_t* %sf, i32 %y) nounwind ssp {
|
||||
entry:
|
||||
%tmp1 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
|
||||
%tmp2 = load %struct.cont_t** %tmp1 ; <%struct.cont_t*> [#uses=1]
|
||||
%tmp3 = getelementptr inbounds %struct.cont_t* %tmp2, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
|
||||
%tmp4 = load void (i8*, i32)** %tmp3 ; <void (i8*, i32)*> [#uses=1]
|
||||
%tmp6 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
|
||||
%tmp7 = load %struct.cont_t** %tmp6 ; <%struct.cont_t*> [#uses=1]
|
||||
%conv = bitcast %struct.cont_t* %tmp7 to i8* ; <i8*> [#uses=1]
|
||||
%tmp9 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
|
||||
%tmp10 = load i32* %tmp9 ; <i32> [#uses=1]
|
||||
%mul = mul i32 %tmp10, %y ; <i32> [#uses=1]
|
||||
call void %tmp4(i8* %conv, i32 %mul)
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @bar(%struct.cont_t* %c, i32 %y) nounwind ssp {
|
||||
entry:
|
||||
%tmp1 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
|
||||
%tmp2 = load void (i8*, i32)** %tmp1 ; <void (i8*, i32)*> [#uses=1]
|
||||
%tmp4 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 1 ; <i8**> [#uses=1]
|
||||
%tmp5 = load i8** %tmp4 ; <i8*> [#uses=1]
|
||||
%add = add nsw i32 %y, 5 ; <i32> [#uses=1]
|
||||
call void %tmp2(i8* %tmp5, i32 %add)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @exit(i32) noreturn
|
||||
|
Loading…
x
Reference in New Issue
Block a user