mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
Add a late optimization to BranchFolding that hoist common instruction sequences
at the start of basic blocks to their common predecessor. It's actually quite common (e.g. about 50 times in JM/lencod) and has shown to be a nice code size benefit. e.g. pushq %rax testl %edi, %edi jne LBB0_2 ## BB#1: xorb %al, %al popq %rdx ret LBB0_2: xorb %al, %al callq _foo popq %rdx ret => pushq %rax xorb %al, %al testl %edi, %edi je LBB0_2 ## BB#1: callq _foo LBB0_2: popq %rdx ret rdar://9145558 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131172 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b38fe4b52d
commit
7e20a574eb
@ -41,6 +41,7 @@ using namespace llvm;
|
|||||||
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
|
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
|
||||||
STATISTIC(NumBranchOpts, "Number of branches optimized");
|
STATISTIC(NumBranchOpts, "Number of branches optimized");
|
||||||
STATISTIC(NumTailMerge , "Number of block tails merged");
|
STATISTIC(NumTailMerge , "Number of block tails merged");
|
||||||
|
STATISTIC(NumHoist , "Number of times common instructions are hoisted");
|
||||||
|
|
||||||
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
|
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
|
||||||
cl::init(cl::BOU_UNSET), cl::Hidden);
|
cl::init(cl::BOU_UNSET), cl::Hidden);
|
||||||
@ -65,7 +66,7 @@ namespace {
|
|||||||
public:
|
public:
|
||||||
static char ID;
|
static char ID;
|
||||||
explicit BranchFolderPass(bool defaultEnableTailMerge)
|
explicit BranchFolderPass(bool defaultEnableTailMerge)
|
||||||
: MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {}
|
: MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
|
||||||
|
|
||||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||||
virtual const char *getPassName() const { return "Control Flow Optimizer"; }
|
virtual const char *getPassName() const { return "Control Flow Optimizer"; }
|
||||||
@ -86,12 +87,14 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
|
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
|
||||||
switch (FlagEnableTailMerge) {
|
switch (FlagEnableTailMerge) {
|
||||||
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
|
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
|
||||||
case cl::BOU_TRUE: EnableTailMerge = true; break;
|
case cl::BOU_TRUE: EnableTailMerge = true; break;
|
||||||
case cl::BOU_FALSE: EnableTailMerge = false; break;
|
case cl::BOU_FALSE: EnableTailMerge = false; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EnableHoistCommonCode = CommonHoist;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// RemoveDeadBlock - Remove the specified dead machine basic block from the
|
/// RemoveDeadBlock - Remove the specified dead machine basic block from the
|
||||||
@ -186,9 +189,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
|
|||||||
|
|
||||||
bool MadeChangeThisIteration = true;
|
bool MadeChangeThisIteration = true;
|
||||||
while (MadeChangeThisIteration) {
|
while (MadeChangeThisIteration) {
|
||||||
MadeChangeThisIteration = false;
|
MadeChangeThisIteration = TailMergeBlocks(MF);
|
||||||
MadeChangeThisIteration |= TailMergeBlocks(MF);
|
|
||||||
MadeChangeThisIteration |= OptimizeBranches(MF);
|
MadeChangeThisIteration |= OptimizeBranches(MF);
|
||||||
|
if (EnableHoistCommonCode)
|
||||||
|
MadeChangeThisIteration |= HoistCommonCode(MF);
|
||||||
MadeChange |= MadeChangeThisIteration;
|
MadeChange |= MadeChangeThisIteration;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -910,7 +914,8 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
|
|||||||
// Make sure blocks are numbered in order
|
// Make sure blocks are numbered in order
|
||||||
MF.RenumberBlocks();
|
MF.RenumberBlocks();
|
||||||
|
|
||||||
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
|
for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
|
||||||
|
I != E; ) {
|
||||||
MachineBasicBlock *MBB = I++;
|
MachineBasicBlock *MBB = I++;
|
||||||
MadeChange |= OptimizeBlock(MBB);
|
MadeChange |= OptimizeBlock(MBB);
|
||||||
|
|
||||||
@ -1339,3 +1344,253 @@ ReoptimizeBlock:
|
|||||||
|
|
||||||
return MadeChange;
|
return MadeChange;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Hoist Common Code
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
/// HoistCommonCode - Hoist common instruction sequences at the start of basic
|
||||||
|
/// blocks to their common predecessor.
|
||||||
|
/// NOTE: This optimization does not update live-in information so it must be
|
||||||
|
/// run after all passes that require correct liveness information.
|
||||||
|
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
|
||||||
|
bool MadeChange = false;
|
||||||
|
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
|
||||||
|
MachineBasicBlock *MBB = I++;
|
||||||
|
MadeChange |= HoistCommonCodeInSuccs(MBB);
|
||||||
|
}
|
||||||
|
|
||||||
|
return MadeChange;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
|
||||||
|
/// its 'true' successor.
|
||||||
|
static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
|
||||||
|
MachineBasicBlock *TrueBB) {
|
||||||
|
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
|
||||||
|
E = BB->succ_end(); SI != E; ++SI) {
|
||||||
|
MachineBasicBlock *SuccBB = *SI;
|
||||||
|
if (SuccBB != TrueBB)
|
||||||
|
return SuccBB;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// findHoistingInsertPosAndDeps - Find the location to move common instructions
|
||||||
|
/// in successors to. The location is ususally just before the terminator,
|
||||||
|
/// however if the terminator is a conditional branch and its previous
|
||||||
|
/// instruction is the flag setting instruction, the previous instruction is
|
||||||
|
/// the preferred location. This function also gathers uses and defs of the
|
||||||
|
/// instructions from the insertion point to the end of the block. The data is
|
||||||
|
/// used by HoistCommonCodeInSuccs to ensure safety.
|
||||||
|
static
|
||||||
|
MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
|
||||||
|
const TargetInstrInfo *TII,
|
||||||
|
const TargetRegisterInfo *TRI,
|
||||||
|
SmallSet<unsigned,4> &Uses,
|
||||||
|
SmallSet<unsigned,4> &Defs) {
|
||||||
|
MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
|
||||||
|
if (!TII->isUnpredicatedTerminator(Loc))
|
||||||
|
return MBB->end();
|
||||||
|
|
||||||
|
for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
|
||||||
|
const MachineOperand &MO = Loc->getOperand(i);
|
||||||
|
if (!MO.isReg())
|
||||||
|
continue;
|
||||||
|
unsigned Reg = MO.getReg();
|
||||||
|
if (!Reg)
|
||||||
|
continue;
|
||||||
|
if (MO.isUse()) {
|
||||||
|
Uses.insert(Reg);
|
||||||
|
for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
|
||||||
|
Uses.insert(*AS);
|
||||||
|
} else if (!MO.isDead())
|
||||||
|
// Don't try to hoist code in the rare case the terminator defines a
|
||||||
|
// register that is later used.
|
||||||
|
return MBB->end();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Uses.empty())
|
||||||
|
return Loc;
|
||||||
|
if (Loc == MBB->begin())
|
||||||
|
return MBB->end();
|
||||||
|
|
||||||
|
// The terminator is probably a conditional branch, try not to separate the
|
||||||
|
// branch from condition setting instruction.
|
||||||
|
MachineBasicBlock::iterator PI = Loc;
|
||||||
|
--PI;
|
||||||
|
while (PI != MBB->begin() && Loc->isDebugValue())
|
||||||
|
--PI;
|
||||||
|
|
||||||
|
bool IsDef = false;
|
||||||
|
for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
|
||||||
|
const MachineOperand &MO = PI->getOperand(i);
|
||||||
|
if (!MO.isReg() || MO.isUse())
|
||||||
|
continue;
|
||||||
|
unsigned Reg = MO.getReg();
|
||||||
|
if (!Reg)
|
||||||
|
continue;
|
||||||
|
if (Uses.count(Reg))
|
||||||
|
IsDef = true;
|
||||||
|
}
|
||||||
|
if (!IsDef)
|
||||||
|
// The condition setting instruction is not just before the conditional
|
||||||
|
// branch.
|
||||||
|
return Loc;
|
||||||
|
|
||||||
|
// Be conservative, don't insert instruction above something that may have
|
||||||
|
// side-effects. And since it's potentially bad to separate flag setting
|
||||||
|
// instruction from the conditional branch, just abort the optimization
|
||||||
|
// completely.
|
||||||
|
// Also avoid moving code above predicated instruction since it's hard to
|
||||||
|
// reason about register liveness with predicated instruction.
|
||||||
|
bool DontMoveAcrossStore = true;
|
||||||
|
if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
|
||||||
|
TII->isPredicated(PI))
|
||||||
|
return MBB->end();
|
||||||
|
|
||||||
|
|
||||||
|
// Find out what registers are live. Note this routine is ignoring other live
|
||||||
|
// registers which are only used by instructions in successor blocks.
|
||||||
|
for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
|
||||||
|
const MachineOperand &MO = PI->getOperand(i);
|
||||||
|
if (!MO.isReg())
|
||||||
|
continue;
|
||||||
|
unsigned Reg = MO.getReg();
|
||||||
|
if (!Reg)
|
||||||
|
continue;
|
||||||
|
if (MO.isUse()) {
|
||||||
|
Uses.insert(Reg);
|
||||||
|
for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
|
||||||
|
Uses.insert(*AS);
|
||||||
|
} else {
|
||||||
|
if (Uses.count(Reg)) {
|
||||||
|
Uses.erase(Reg);
|
||||||
|
for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
|
||||||
|
Uses.erase(*SR); // Use getSubRegisters to be conservative
|
||||||
|
Defs.insert(Reg);
|
||||||
|
for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
|
||||||
|
Defs.insert(*AS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return PI;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
|
||||||
|
/// sequence at the start of the function, move the instructions before MBB
|
||||||
|
/// terminator if it's legal.
|
||||||
|
bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
|
||||||
|
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||||
|
SmallVector<MachineOperand, 4> Cond;
|
||||||
|
if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!FBB) FBB = findFalseBlock(MBB, TBB);
|
||||||
|
if (!FBB)
|
||||||
|
// Malformed bcc? True and false blocks are the same?
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Restrict the optimization to cases where MBB is the only predecessor,
|
||||||
|
// it is an obvious win.
|
||||||
|
if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Find a suitable position to hoist the common instructions to. Also figure
|
||||||
|
// out which registers are used or defined by instructions from the insertion
|
||||||
|
// point to the end of the block.
|
||||||
|
SmallSet<unsigned, 4> Uses, Defs;
|
||||||
|
MachineBasicBlock::iterator Loc =
|
||||||
|
findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
|
||||||
|
if (Loc == MBB->end())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
SmallSet<unsigned, 4> LocalDefs;
|
||||||
|
unsigned NumDups = 0;
|
||||||
|
MachineBasicBlock::iterator TIB = TBB->begin();
|
||||||
|
MachineBasicBlock::iterator FIB = FBB->begin();
|
||||||
|
MachineBasicBlock::iterator TIE = TBB->end();
|
||||||
|
MachineBasicBlock::iterator FIE = FBB->end();
|
||||||
|
while (TIB != TIE && FIB != FIE) {
|
||||||
|
// Skip dbg_value instructions. These do not count.
|
||||||
|
if (TIB->isDebugValue()) {
|
||||||
|
while (TIB != TIE && TIB->isDebugValue())
|
||||||
|
++TIB;
|
||||||
|
if (TIB == TIE)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (FIB->isDebugValue()) {
|
||||||
|
while (FIB != FIE && FIB->isDebugValue())
|
||||||
|
++FIB;
|
||||||
|
if (FIB == FIE)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!TIB->isIdenticalTo(FIB))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (TII->isPredicated(TIB))
|
||||||
|
// Hard to reason about register liveness with predicated instruction.
|
||||||
|
break;
|
||||||
|
|
||||||
|
bool IsSafe = true;
|
||||||
|
for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
|
||||||
|
const MachineOperand &MO = TIB->getOperand(i);
|
||||||
|
if (!MO.isReg())
|
||||||
|
continue;
|
||||||
|
unsigned Reg = MO.getReg();
|
||||||
|
if (!Reg)
|
||||||
|
continue;
|
||||||
|
if (MO.isDef()) {
|
||||||
|
if (Uses.count(Reg)) {
|
||||||
|
// Avoid clobbering a register that's used by the instruction at
|
||||||
|
// the point of insertion.
|
||||||
|
IsSafe = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!MO.isDead() && Defs.count(Reg)) {
|
||||||
|
// Don't hoist the instruction if the def would be clobber by the
|
||||||
|
// instruction at the point insertion. FIXME: This is overly
|
||||||
|
// conservative. It should be possible to hoist the instructions
|
||||||
|
// in BB2 in the following example:
|
||||||
|
// BB1:
|
||||||
|
// r1, eflag = op1 r2, r3
|
||||||
|
// brcc eflag
|
||||||
|
//
|
||||||
|
// BB2:
|
||||||
|
// r1 = op2, ...
|
||||||
|
// = op3, r1<kill>
|
||||||
|
IsSafe = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
LocalDefs.insert(Reg);
|
||||||
|
for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
|
||||||
|
LocalDefs.insert(*SR);
|
||||||
|
} else if (!LocalDefs.count(Reg)) {
|
||||||
|
if (Defs.count(Reg)) {
|
||||||
|
// Use is defined by the instruction at the point of insertion.
|
||||||
|
IsSafe = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!IsSafe)
|
||||||
|
break;
|
||||||
|
|
||||||
|
bool DontMoveAcrossStore = true;
|
||||||
|
if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
|
||||||
|
break;
|
||||||
|
|
||||||
|
++NumDups;
|
||||||
|
++TIB;
|
||||||
|
++FIB;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!NumDups)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
MBB->splice(Loc, TBB, TBB->begin(), TIB);
|
||||||
|
FBB->erase(FBB->begin(), FIB);
|
||||||
|
++NumHoist;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
@ -19,11 +19,10 @@ namespace llvm {
|
|||||||
class RegScavenger;
|
class RegScavenger;
|
||||||
class TargetInstrInfo;
|
class TargetInstrInfo;
|
||||||
class TargetRegisterInfo;
|
class TargetRegisterInfo;
|
||||||
template<typename T> class SmallVectorImpl;
|
|
||||||
|
|
||||||
class BranchFolder {
|
class BranchFolder {
|
||||||
public:
|
public:
|
||||||
explicit BranchFolder(bool defaultEnableTailMerge);
|
explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
|
||||||
|
|
||||||
bool OptimizeFunction(MachineFunction &MF,
|
bool OptimizeFunction(MachineFunction &MF,
|
||||||
const TargetInstrInfo *tii,
|
const TargetInstrInfo *tii,
|
||||||
@ -85,6 +84,7 @@ namespace llvm {
|
|||||||
std::vector<SameTailElt> SameTails;
|
std::vector<SameTailElt> SameTails;
|
||||||
|
|
||||||
bool EnableTailMerge;
|
bool EnableTailMerge;
|
||||||
|
bool EnableHoistCommonCode;
|
||||||
const TargetInstrInfo *TII;
|
const TargetInstrInfo *TII;
|
||||||
const TargetRegisterInfo *TRI;
|
const TargetRegisterInfo *TRI;
|
||||||
MachineModuleInfo *MMI;
|
MachineModuleInfo *MMI;
|
||||||
@ -110,6 +110,9 @@ namespace llvm {
|
|||||||
bool OptimizeBlock(MachineBasicBlock *MBB);
|
bool OptimizeBlock(MachineBasicBlock *MBB);
|
||||||
void RemoveDeadBlock(MachineBasicBlock *MBB);
|
void RemoveDeadBlock(MachineBasicBlock *MBB);
|
||||||
bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
|
bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
|
||||||
|
|
||||||
|
bool HoistCommonCode(MachineFunction &MF);
|
||||||
|
bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -265,7 +265,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
if (!TII) return false;
|
if (!TII) return false;
|
||||||
|
|
||||||
// Tail merge tend to expose more if-conversion opportunities.
|
// Tail merge tend to expose more if-conversion opportunities.
|
||||||
BranchFolder BF(true);
|
BranchFolder BF(true, false);
|
||||||
bool BFChange = BF.OptimizeFunction(MF, TII,
|
bool BFChange = BF.OptimizeFunction(MF, TII,
|
||||||
MF.getTarget().getRegisterInfo(),
|
MF.getTarget().getRegisterInfo(),
|
||||||
getAnalysisIfAvailable<MachineModuleInfo>());
|
getAnalysisIfAvailable<MachineModuleInfo>());
|
||||||
@ -399,7 +399,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
BBAnalysis.clear();
|
BBAnalysis.clear();
|
||||||
|
|
||||||
if (MadeChange && IfCvtBranchFold) {
|
if (MadeChange && IfCvtBranchFold) {
|
||||||
BranchFolder BF(false);
|
BranchFolder BF(false, false);
|
||||||
BF.OptimizeFunction(MF, TII,
|
BF.OptimizeFunction(MF, TII,
|
||||||
MF.getTarget().getRegisterInfo(),
|
MF.getTarget().getRegisterInfo(),
|
||||||
getAnalysisIfAvailable<MachineModuleInfo>());
|
getAnalysisIfAvailable<MachineModuleInfo>());
|
||||||
|
28
test/CodeGen/X86/hoist-common.ll
Normal file
28
test/CodeGen/X86/hoist-common.ll
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||||
|
|
||||||
|
; Common "xorb al, al" instruction in the two successor blocks should be
|
||||||
|
; moved to the entry block above the test + je.
|
||||||
|
|
||||||
|
; rdar://9145558
|
||||||
|
|
||||||
|
define zeroext i1 @t(i32 %c) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: t:
|
||||||
|
; CHECK: xorb %al, %al
|
||||||
|
; CHECK: test
|
||||||
|
; CHECK: je
|
||||||
|
%tobool = icmp eq i32 %c, 0
|
||||||
|
br i1 %tobool, label %return, label %if.then
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
; CHECK: callq
|
||||||
|
%call = tail call zeroext i1 (...)* @foo() nounwind
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
return:
|
||||||
|
; CHECK: ret
|
||||||
|
%retval.0 = phi i1 [ %call, %if.then ], [ false, %entry ]
|
||||||
|
ret i1 %retval.0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare zeroext i1 @foo(...)
|
Loading…
x
Reference in New Issue
Block a user