[MachineLICM] First steps of sinking GEPs near calls.

Specifically, if there are copy-like instructions in the loop header
they are moved into the loop close to their uses. This reduces the live
intervals of the values and can avoid register spills.

This is working towards a fix for http://llvm.org/PR22230.
Review: http://reviews.llvm.org/D7259

Next steps:
- Find a better cost model (which non-copy instructions should be sunk?)
- Make this dependent on register pressure

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232262 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Daniel Jasper 2015-03-14 10:58:38 +00:00
parent e282854e63
commit 439cc2c5de
2 changed files with 123 additions and 0 deletions

View File

@ -54,6 +54,12 @@ HoistCheapInsts("hoist-cheap-insts",
cl::desc("MachineLICM should hoist even cheap instructions"),
cl::init(false), cl::Hidden);
static cl::opt<bool>
SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
cl::desc("MachineLICM should sink instructions into "
"loops to avoid register spills"),
cl::init(false), cl::Hidden);
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
STATISTIC(NumLowRP,
@ -243,6 +249,11 @@ namespace {
void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
/// SinkIntoLoop - Sink instructions into loops if profitable. This
/// especially tries to prevent register spills caused by register pressure
/// if there is little to no overhead moving instructions into loops.
void SinkIntoLoop();
/// getRegisterClassIDAndCost - For a given MI, register, and the operand
/// index, return the ID and cost of its representative register class by
/// reference.
@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
FirstInLoop = true;
HoistOutOfLoop(N);
CSEMap.clear();
if (SinkInstsToAvoidSpills)
SinkIntoLoop();
}
}
@ -771,6 +785,53 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
}
}
void MachineLICM::SinkIntoLoop() {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
return;
SmallVector<MachineInstr *, 8> Candidates;
for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
I != Preheader->instr_end(); ++I) {
// We need to ensure that we can safely move this instruction into the loop.
// As such, it must not have side-effects, e.g. such as a call has.
if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
Candidates.push_back(I);
}
for (MachineInstr *I : Candidates) {
const MachineOperand &MO = I->getOperand(0);
if (!MO.isDef() || !MO.isReg() || !MO.getReg())
continue;
if (!MRI->hasOneDef(MO.getReg()))
continue;
bool CanSink = true;
MachineBasicBlock *B = nullptr;
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
// FIXME: Come up with a proper cost model that estimates whether sinking
// the instruction (and thus possibly executing it on every loop
// iteration) is more expensive than a register.
// For now assumes that copies are cheap and thus almost always worth it.
if (!MI.isCopy()) {
CanSink = false;
break;
}
if (!B) {
B = MI.getParent();
continue;
}
B = DT->findNearestCommonDominator(B, MI.getParent());
if (!B) {
CanSink = false;
break;
}
}
if (!CanSink || !B || B == Preheader)
continue;
B->splice(B->getFirstNonPHI(), Preheader, I);
}
}
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
}

View File

@ -0,0 +1,62 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-linux -sink-insts-to-avoid-spills | FileCheck %s -check-prefix=SINK
; Ensure that we sink copy-like instructions into loops to avoid register
; spills.
; CHECK: Spill
; SINK-NOT: Spill
%struct.A = type { i32, i32, i32, i32, i32, i32 }
define void @_Z1fPhP1A(i8* nocapture readonly %input, %struct.A* %a) {
%1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
%2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1
%3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2
%4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3
%5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4
%6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5
br label %.backedge
.backedge:
%.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ]
%7 = getelementptr inbounds i8, i8* %.0, i64 1
%8 = load i8, i8* %7, align 1
switch i8 %8, label %.backedge.backedge [
i8 0, label %9
i8 10, label %10
i8 20, label %11
i8 30, label %12
i8 40, label %13
i8 50, label %14
]
; <label>:9
tail call void @_Z6assignPj(i32* %1)
br label %.backedge.backedge
; <label>:10
tail call void @_Z6assignPj(i32* %2)
br label %.backedge.backedge
.backedge.backedge:
br label %.backedge
; <label>:11
tail call void @_Z6assignPj(i32* %3)
br label %.backedge.backedge
; <label>:12
tail call void @_Z6assignPj(i32* %4)
br label %.backedge.backedge
; <label>:13
tail call void @_Z6assignPj(i32* %5)
br label %.backedge.backedge
; <label>:14
tail call void @_Z6assignPj(i32* %6)
br label %.backedge.backedge
}
declare void @_Z6assignPj(i32*)