mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-29 13:32:33 +00:00
Improve 'tail' call marking in TRE. A bootstrap of clang goes from 375k calls marked tail in the IR to 470k, however this improvement does not carry into an improvement of the call/jmp ratio on x86. The most common pattern is a tail call + br to a block with nothing but a 'ret'.
The number of tail call to loop conversions remains the same (1618 by my count). The new algorithm does a local scan over the use-def chains to identify local "alloca-derived" values, as well as points where the alloca could escape. Then, a visit over the CFG marks blocks as being before or after the allocas have escaped, and annotates the calls accordingly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208017 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
59c397de1a
commit
05da4dd998
@ -166,6 +166,11 @@ public:
|
||||
return isCall() && cast<CallInst>(getInstruction())->isMustTailCall();
|
||||
}
|
||||
|
||||
/// \brief Tests if this call site is marked as a tail call.
|
||||
bool isTailCall() const {
|
||||
return isCall() && cast<CallInst>(getInstruction())->isTailCall();
|
||||
}
|
||||
|
||||
#define CALLSITE_DELEGATE_GETTER(METHOD) \
|
||||
InstrTy *II = getInstruction(); \
|
||||
return isCall() \
|
||||
|
@ -55,6 +55,7 @@
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/CaptureTracking.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/InlineCost.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
@ -95,6 +96,9 @@ namespace {
|
||||
bool runOnFunction(Function &F) override;
|
||||
|
||||
private:
|
||||
bool runTRE(Function &F);
|
||||
bool markTails(Function &F, bool &AllCallsAreTailCalls);
|
||||
|
||||
CallInst *FindTRECandidate(Instruction *I,
|
||||
bool CannotTailCallElimCallsMarkedTail);
|
||||
bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
|
||||
@ -146,35 +150,232 @@ static bool CanTRE(AllocaInst *AI) {
|
||||
isa<ConstantInt>(AI->getArraySize());
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct AllocaCaptureTracker : public CaptureTracker {
|
||||
AllocaCaptureTracker() : Captured(false) {}
|
||||
|
||||
void tooManyUses() override { Captured = true; }
|
||||
|
||||
bool shouldExplore(const Use *U) override {
|
||||
Value *V = U->getUser();
|
||||
if (isa<CallInst>(V) || isa<InvokeInst>(V))
|
||||
UsesAlloca.insert(V);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool captured(const Use *U) override {
|
||||
if (isa<ReturnInst>(U->getUser()))
|
||||
return false;
|
||||
Captured = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Captured;
|
||||
SmallPtrSet<const Value *, 16> UsesAlloca;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
bool TailCallElim::runOnFunction(Function &F) {
|
||||
if (skipOptnoneFunction(F))
|
||||
return false;
|
||||
|
||||
bool AllCallsAreTailCalls = false;
|
||||
bool Modified = markTails(F, AllCallsAreTailCalls);
|
||||
if (AllCallsAreTailCalls)
|
||||
Modified |= runTRE(F);
|
||||
return Modified;
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct AllocaDerivedValueTracker {
|
||||
// Start at a root value and walk its use-def chain to mark calls that use the
|
||||
// value or a derived value in AllocaUsers, and places where it may escape in
|
||||
// EscapePoints.
|
||||
void walk(Value *Root) {
|
||||
SmallVector<Use *, 32> Worklist;
|
||||
SmallPtrSet<Use *, 32> Visited;
|
||||
|
||||
auto AddUsesToWorklist = [&](Value *V) {
|
||||
for (auto &U : V->uses()) {
|
||||
if (!Visited.insert(&U))
|
||||
continue;
|
||||
Worklist.push_back(&U);
|
||||
}
|
||||
};
|
||||
|
||||
AddUsesToWorklist(Root);
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
Use *U = Worklist.pop_back_val();
|
||||
Instruction *I = cast<Instruction>(U->getUser());
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::Call:
|
||||
case Instruction::Invoke: {
|
||||
CallSite CS(I);
|
||||
bool IsNocapture = !CS.isCallee(U) &&
|
||||
CS.doesNotCapture(CS.getArgumentNo(U));
|
||||
callUsesLocalStack(CS, IsNocapture);
|
||||
if (IsNocapture) {
|
||||
// If the alloca-derived argument is passed in as nocapture, then it
|
||||
// can't propagate to the call's return. That would be capturing.
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Instruction::Load: {
|
||||
// The result of a load is not alloca-derived (unless an alloca has
|
||||
// otherwise escaped, but this is a local analysis).
|
||||
continue;
|
||||
}
|
||||
case Instruction::Store: {
|
||||
if (U->getOperandNo() == 0)
|
||||
EscapePoints.insert(I);
|
||||
continue; // Stores have no users to analyze.
|
||||
}
|
||||
case Instruction::BitCast:
|
||||
case Instruction::GetElementPtr:
|
||||
case Instruction::PHI:
|
||||
case Instruction::Select:
|
||||
case Instruction::AddrSpaceCast:
|
||||
break;
|
||||
default:
|
||||
EscapePoints.insert(I);
|
||||
break;
|
||||
}
|
||||
|
||||
AddUsesToWorklist(I);
|
||||
}
|
||||
}
|
||||
|
||||
void callUsesLocalStack(CallSite CS, bool IsNocapture) {
|
||||
// Add it to the list of alloca users. If it's already there, skip further
|
||||
// processing.
|
||||
if (!AllocaUsers.insert(CS.getInstruction()))
|
||||
return;
|
||||
|
||||
// If it's nocapture then it can't capture the alloca.
|
||||
if (IsNocapture)
|
||||
return;
|
||||
|
||||
// If it can write to memory, it can leak the alloca value.
|
||||
if (!CS.onlyReadsMemory())
|
||||
EscapePoints.insert(CS.getInstruction());
|
||||
}
|
||||
|
||||
SmallPtrSet<Instruction *, 32> AllocaUsers;
|
||||
SmallPtrSet<Instruction *, 32> EscapePoints;
|
||||
};
|
||||
}
|
||||
|
||||
bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
|
||||
if (F.callsFunctionThatReturnsTwice())
|
||||
return false;
|
||||
AllCallsAreTailCalls = true;
|
||||
|
||||
// The local stack holds all alloca instructions and all byval arguments.
|
||||
AllocaDerivedValueTracker Tracker;
|
||||
for (Argument &Arg : F.args()) {
|
||||
if (Arg.hasByValAttr())
|
||||
Tracker.walk(&Arg);
|
||||
}
|
||||
for (auto &BB : F) {
|
||||
for (auto &I : BB)
|
||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
|
||||
Tracker.walk(AI);
|
||||
}
|
||||
|
||||
bool Modified = false;
|
||||
|
||||
// Track whether a block is reachable after an alloca has escaped. Blocks that
|
||||
// contain the escaping instruction will be marked as being visited without an
|
||||
// escaped alloca, since that is how the block began.
|
||||
enum VisitType {
|
||||
UNVISITED,
|
||||
UNESCAPED,
|
||||
ESCAPED
|
||||
};
|
||||
DenseMap<BasicBlock *, VisitType> Visited;
|
||||
|
||||
// We propagate the fact that an alloca has escaped from block to successor.
|
||||
// Visit the blocks that are propagating the escapedness first. To do this, we
|
||||
// maintain two worklists.
|
||||
SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
|
||||
|
||||
// We may enter a block and visit it thinking that no alloca has escaped yet,
|
||||
// then see an escape point and go back around a loop edge and come back to
|
||||
// the same block twice. Because of this, we defer setting tail on calls when
|
||||
// we first encounter them in a block. Every entry in this list does not
|
||||
// statically use an alloca via use-def chain analysis, but may find an alloca
|
||||
// through other means if the block turns out to be reachable after an escape
|
||||
// point.
|
||||
SmallVector<CallInst *, 32> DeferredTails;
|
||||
|
||||
BasicBlock *BB = &F.getEntryBlock();
|
||||
VisitType Escaped = UNESCAPED;
|
||||
do {
|
||||
for (auto &I : *BB) {
|
||||
if (Tracker.EscapePoints.count(&I))
|
||||
Escaped = ESCAPED;
|
||||
|
||||
CallInst *CI = dyn_cast<CallInst>(&I);
|
||||
if (!CI || CI->isTailCall())
|
||||
continue;
|
||||
|
||||
if (CI->doesNotAccessMemory()) {
|
||||
// A call to a readnone function whose arguments are all things computed
|
||||
// outside this function can be marked tail. Even if you stored the
|
||||
// alloca address into a global, a readnone function can't load the
|
||||
// global anyhow.
|
||||
//
|
||||
// Note that this runs whether we know an alloca has escaped or not. If
|
||||
// it has, then we can't trust Tracker.AllocaUsers to be accurate.
|
||||
bool SafeToTail = true;
|
||||
for (auto &Arg : CI->arg_operands()) {
|
||||
if (isa<Constant>(Arg.getUser()))
|
||||
continue;
|
||||
if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
|
||||
if (!A->hasByValAttr())
|
||||
continue;
|
||||
SafeToTail = false;
|
||||
break;
|
||||
}
|
||||
if (SafeToTail) {
|
||||
F.getContext().emitOptimizationRemark(
|
||||
"tailcallelim", F, CI->getDebugLoc(),
|
||||
"found readnone tail call candidate");
|
||||
CI->setTailCall();
|
||||
Modified = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
|
||||
DeferredTails.push_back(CI);
|
||||
} else {
|
||||
AllCallsAreTailCalls = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
|
||||
auto &State = Visited[SuccBB];
|
||||
if (State < Escaped) {
|
||||
State = Escaped;
|
||||
if (State == ESCAPED)
|
||||
WorklistEscaped.push_back(SuccBB);
|
||||
else
|
||||
WorklistUnescaped.push_back(SuccBB);
|
||||
}
|
||||
}
|
||||
|
||||
if (!WorklistEscaped.empty()) {
|
||||
BB = WorklistEscaped.pop_back_val();
|
||||
Escaped = ESCAPED;
|
||||
} else {
|
||||
BB = nullptr;
|
||||
while (!WorklistUnescaped.empty()) {
|
||||
auto *NextBB = WorklistUnescaped.pop_back_val();
|
||||
if (Visited[NextBB] == UNESCAPED) {
|
||||
BB = NextBB;
|
||||
Escaped = UNESCAPED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (BB);
|
||||
|
||||
for (CallInst *CI : DeferredTails) {
|
||||
if (Visited[CI->getParent()] != ESCAPED) {
|
||||
// If the escape point was part way through the block, calls after the
|
||||
// escape point wouldn't have been put into DeferredTails.
|
||||
F.getContext().emitOptimizationRemark(
|
||||
"tailcallelim", F, CI->getDebugLoc(), "found tail call candidate");
|
||||
CI->setTailCall();
|
||||
Modified = true;
|
||||
} else {
|
||||
AllCallsAreTailCalls = false;
|
||||
}
|
||||
}
|
||||
|
||||
return Modified;
|
||||
}
|
||||
|
||||
bool TailCallElim::runTRE(Function &F) {
|
||||
// If this function is a varargs function, we won't be able to PHI the args
|
||||
// right, so don't even try to convert it...
|
||||
if (F.getFunctionType()->isVarArg()) return false;
|
||||
@ -191,46 +392,30 @@ bool TailCallElim::runOnFunction(Function &F) {
|
||||
// doesn't).
|
||||
bool CanTRETailMarkedCall = true;
|
||||
|
||||
// Find calls that can be marked tail.
|
||||
AllocaCaptureTracker ACT;
|
||||
// Find dynamic allocas.
|
||||
for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
|
||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
|
||||
CanTRETailMarkedCall &= CanTRE(AI);
|
||||
PointerMayBeCaptured(AI, &ACT);
|
||||
// If any allocas are captured, exit.
|
||||
if (ACT.Captured)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If any byval or inalloca args are captured, exit. They are also allocated
|
||||
// in our stack frame.
|
||||
for (Argument &Arg : F.args()) {
|
||||
if (Arg.hasByValOrInAllocaAttr())
|
||||
PointerMayBeCaptured(&Arg, &ACT);
|
||||
if (ACT.Captured)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Second pass, change any tail recursive calls to loops.
|
||||
// Change any tail recursive calls to loops.
|
||||
//
|
||||
// FIXME: The code generator produces really bad code when an 'escaping
|
||||
// alloca' is changed from being a static alloca to being a dynamic alloca.
|
||||
// Until this is resolved, disable this transformation if that would ever
|
||||
// happen. This bug is PR962.
|
||||
if (ACT.UsesAlloca.empty()) {
|
||||
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
|
||||
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
|
||||
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
|
||||
ArgumentPHIs, !CanTRETailMarkedCall);
|
||||
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
|
||||
Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
|
||||
TailCallsAreMarkedTail, ArgumentPHIs,
|
||||
!CanTRETailMarkedCall);
|
||||
MadeChange |= Change;
|
||||
}
|
||||
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
|
||||
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
|
||||
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
|
||||
ArgumentPHIs, !CanTRETailMarkedCall);
|
||||
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
|
||||
Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
|
||||
TailCallsAreMarkedTail, ArgumentPHIs,
|
||||
!CanTRETailMarkedCall);
|
||||
MadeChange |= Change;
|
||||
}
|
||||
}
|
||||
|
||||
@ -239,34 +424,13 @@ bool TailCallElim::runOnFunction(Function &F) {
|
||||
// with themselves. Check to see if we did and clean up our mess if so. This
|
||||
// occurs when a function passes an argument straight through to its tail
|
||||
// call.
|
||||
if (!ArgumentPHIs.empty()) {
|
||||
for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
|
||||
PHINode *PN = ArgumentPHIs[i];
|
||||
for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
|
||||
PHINode *PN = ArgumentPHIs[i];
|
||||
|
||||
// If the PHI Node is a dynamic constant, replace it with the value it is.
|
||||
if (Value *PNV = SimplifyInstruction(PN)) {
|
||||
PN->replaceAllUsesWith(PNV);
|
||||
PN->eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// At this point, we know that the function does not have any captured
|
||||
// allocas. If additionally the function does not call setjmp, mark all calls
|
||||
// in the function that do not access stack memory with the tail keyword. This
|
||||
// implies ensuring that there does not exist any path from a call that takes
|
||||
// in an alloca but does not capture it and the call which we wish to mark
|
||||
// with "tail".
|
||||
if (!F.callsFunctionThatReturnsTwice()) {
|
||||
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
|
||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||
if (CallInst *CI = dyn_cast<CallInst>(I)) {
|
||||
if (!ACT.UsesAlloca.count(CI)) {
|
||||
CI->setTailCall();
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// If the PHI Node is a dynamic constant, replace it with the value it is.
|
||||
if (Value *PNV = SimplifyInstruction(PN)) {
|
||||
PN->replaceAllUsesWith(PNV);
|
||||
PN->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
@ -520,6 +684,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
|
||||
BasicBlock *BB = Ret->getParent();
|
||||
Function *F = BB->getParent();
|
||||
|
||||
F->getContext().emitOptimizationRemark(
|
||||
"tailcallelim", *F, CI->getDebugLoc(),
|
||||
"transforming tail recursion to loop");
|
||||
|
||||
// OK! We can transform this tail call. If this is the first one found,
|
||||
// create the new entry block, allowing us to branch back to the old entry.
|
||||
if (!OldEntry) {
|
||||
|
@ -151,3 +151,26 @@ define void @test9(i32* byval %a) {
|
||||
call void @use(i32* %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
%struct.X = type { i8* }
|
||||
|
||||
declare void @ctor(%struct.X*)
|
||||
define void @test10(%struct.X* noalias sret %agg.result, i1 zeroext %b) {
|
||||
; CHECK-LABEL @test10
|
||||
entry:
|
||||
%x = alloca %struct.X, align 8
|
||||
br i1 %b, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
call void @ctor(%struct.X* %agg.result)
|
||||
; CHECK: tail call void @ctor
|
||||
br label %return
|
||||
|
||||
if.end:
|
||||
call void @ctor(%struct.X* %x)
|
||||
; CHECK: call void @ctor
|
||||
br label %return
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user