Post-ra LICM should take care not to hoist an instruction that would clobber a

register that's read by the preheader terminator.

rdar://11095580


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153492 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2012-03-27 01:50:58 +00:00
parent bca9c25dab
commit d6c2355789
2 changed files with 87 additions and 4 deletions

View File

@ -478,6 +478,10 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
/// invariants out to the preheader.
void MachineLICM::HoistRegionPostRA() {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
return;
unsigned NumRegs = TRI->getNumRegs();
BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
@ -514,25 +518,46 @@ void MachineLICM::HoistRegionPostRA() {
}
}
// Gather the registers read / clobbered by the terminator.
BitVector TermRegs(NumRegs);
MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
if (TI != Preheader->end()) {
for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = TI->getOperand(i);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
TermRegs.set(*AS);
}
}
// Now evaluate whether the potential candidates qualify.
// 1. Check if the candidate defined register is defined by another
// instruction in the loop.
// 2. If the candidate is a load from stack slot (always true for now),
// check if the slot is stored anywhere in the loop.
// 3. Make sure candidate def should not clobber
// registers read by the terminator. Similarly its def should not be
// clobbered by the terminator.
for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
if (Candidates[i].FI != INT_MIN &&
StoredFIs.count(Candidates[i].FI))
continue;
if (!PhysRegClobbers.test(Candidates[i].Def)) {
unsigned Def = Candidates[i].Def;
if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
bool Safe = true;
MachineInstr *MI = Candidates[i].MI;
for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
const MachineOperand &MO = MI->getOperand(j);
if (!MO.isReg() || MO.isDef() || !MO.getReg())
continue;
if (PhysRegDefs.test(MO.getReg()) ||
PhysRegClobbers.test(MO.getReg())) {
unsigned Reg = MO.getReg();
if (PhysRegDefs.test(Reg) ||
PhysRegClobbers.test(Reg)) {
// If it's using a non-loop-invariant register, then it's obviously
// not safe to hoist.
Safe = false;
@ -571,7 +596,6 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
/// dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader) return;
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.

View File

@ -0,0 +1,59 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \
; RUN: not grep {Number of machine instructions hoisted out of loops post regalloc}
; rdar://11095580
%struct.ref_s = type { %union.color_sample, i16, i16 }
%union.color_sample = type { i64 }
@table = external global [3891 x i64]
declare i32 @foo()
define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp {
entry:
%call = tail call i32 @foo()
%tmp = ashr i32 %call, 31
%0 = and i32 %tmp, 1396
%index9 = add i32 %0, 2397
indirectbr i8* undef, [label %return, label %if.end]
if.end: ; preds = %entry
%size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2
%tmp6 = load i16* %size5, align 2
%tobool1 = icmp eq i16 %tmp6, 0
%1 = select i1 %tobool1, i32 1396, i32 -1910
%index10 = add i32 %index9, %1
indirectbr i8* undef, [label %return, label %while.body.lr.ph]
while.body.lr.ph: ; preds = %if.end
%refs = bitcast %struct.ref_s* %op to %struct.ref_s**
%tmp9 = load %struct.ref_s** %refs, align 8
%tmp4 = zext i16 %tmp6 to i64
%index13 = add i32 %index10, 1658
%2 = sext i32 %index13 to i64
%3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2
%blockaddress14 = load i64* %3, align 8
%4 = inttoptr i64 %blockaddress14 to i8*
indirectbr i8* %4, [label %while.body]
while.body: ; preds = %while.body, %while.body.lr.ph
%index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ]
%indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
%type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1
store i16 32, i16* %type_attrs, align 2
%indvar.next = add i64 %indvar, 1
%exitcond5 = icmp eq i64 %indvar.next, %tmp4
%tmp7 = select i1 %exitcond5, i32 1648, i32 0
%index15 = add i32 %index7, %tmp7
%tmp8 = select i1 %exitcond5, i64 13, i64 0
%5 = sext i32 %index15 to i64
%6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5
%blockaddress16 = load i64* %6, align 8
%7 = inttoptr i64 %blockaddress16 to i8*
indirectbr i8* %7, [label %return, label %while.body]
return: ; preds = %while.body, %if.end, %entry
%retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ]
ret i32 %retval.0
}