After r147827 and r147902, it's now possible for unallocatable registers to be

live across BBs before register allocation. This miscompiled 197.parser
when a cmp + b are optimized to a cbnz instruction even though the CPSR def
is live-in a successor.
        cbnz    r6, LBB89_12
...
LBB89_12:
        ble     LBB89_1

The fix consists of two parts. 1) Teach LiveVariables that some unallocatable
registers might be liveouts so don't mark their last use as kill if they are.
2) ARM constantpool island pass shouldn't form cbz / cbnz if the conditional
branch does not kill CPSR.

rdar://10676853


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148168 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2012-01-14 01:53:46 +00:00
parent d32d3b758f
commit bfe8afaaec
3 changed files with 130 additions and 6 deletions

View File

@ -261,12 +261,11 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
Processed.insert(*SS); Processed.insert(*SS);
} }
} }
} } else if (LastDef && !PhysRegUse[Reg] &&
else if (LastDef && !PhysRegUse[Reg] && !LastDef->findRegisterDefOperand(Reg))
!LastDef->findRegisterDefOperand(Reg))
// Last def defines the super register, add an implicit def of reg. // Last def defines the super register, add an implicit def of reg.
LastDef->addOperand(MachineOperand::CreateReg(Reg, LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
true/*IsDef*/, true/*IsImp*/)); true/*IsImp*/));
// Remember this use. // Remember this use.
PhysRegUse[Reg] = MI; PhysRegUse[Reg] = MI;
@ -607,10 +606,27 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
} }
} }
// MachineCSE may CSE instructions which write to non-allocatable physical
// registers across MBBs. Remember if any reserved register is liveout.
SmallSet<unsigned, 4> LiveOuts;
for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI) {
MachineBasicBlock *SuccMBB = *SI;
if (SuccMBB->isLandingPad())
continue;
for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
LE = SuccMBB->livein_end(); LI != LE; ++LI) {
unsigned LReg = *LI;
if (!TRI->isInAllocatableClass(LReg))
// Ignore other live-ins, e.g. those that are live into landing pads.
LiveOuts.insert(LReg);
}
}
// Loop over PhysRegDef / PhysRegUse, killing any registers that are // Loop over PhysRegDef / PhysRegUse, killing any registers that are
// available at the end of the basic block. // available at the end of the basic block.
for (unsigned i = 0; i != NumRegs; ++i) for (unsigned i = 0; i != NumRegs; ++i)
if (PhysRegDef[i] || PhysRegUse[i]) if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
HandlePhysRegDef(i, 0, Defs); HandlePhysRegDef(i, 0, Defs);
std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);

View File

@ -1791,6 +1791,11 @@ bool ARMConstantIslands::OptimizeThumb2Branches() {
if (Opcode != ARM::tBcc) if (Opcode != ARM::tBcc)
continue; continue;
// If the conditional branch doesn't kill CPSR, then CPSR can be liveout
// so this transformation is not safe.
if (!Br.MI->killsRegister(ARM::CPSR))
continue;
NewOpc = 0; NewOpc = 0;
unsigned PredReg = 0; unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg); ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);

View File

@ -0,0 +1,103 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
; rdar://10676853
%struct.Dict_node_struct = type { i8*, %struct.Word_file_struct*, %struct.Exp_struct*, %struct.Dict_node_struct*, %struct.Dict_node_struct* }
%struct.Word_file_struct = type { [60 x i8], i32, %struct.Word_file_struct* }
%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon }
%union.anon = type { %struct.E_list_struct* }
%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* }
@lookup_list = external hidden unnamed_addr global %struct.Dict_node_struct*, align 4
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp {
; CHECK: rdictionary_lookup:
entry:
br label %tailrecurse
tailrecurse: ; preds = %if.then10, %entry
%dn.tr = phi %struct.Dict_node_struct* [ %dn, %entry ], [ %9, %if.then10 ]
%cmp = icmp eq %struct.Dict_node_struct* %dn.tr, null
br i1 %cmp, label %if.end11, label %if.end
if.end: ; preds = %tailrecurse
%string = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
%0 = load i8** %string, align 4
br label %while.cond.i
while.cond.i: ; preds = %while.body.i, %if.end
%1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
%storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
%2 = load i8* %1, align 1
%cmp.i = icmp eq i8 %2, 0
%.pre.i = load i8* %storemerge.i, align 1
br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
land.end.i: ; preds = %while.cond.i
%cmp4.i = icmp eq i8 %2, %.pre.i
br i1 %cmp4.i, label %while.body.i, label %while.end.i
while.body.i: ; preds = %land.end.i
%incdec.ptr.i = getelementptr inbounds i8* %1, i32 1
%incdec.ptr6.i = getelementptr inbounds i8* %storemerge.i, i32 1
br label %while.cond.i
while.end.i: ; preds = %land.end.i
%cmp8.i = icmp eq i8 %2, 42
br i1 %cmp8.i, label %if.end3, label %lor.lhs.false.i
lor.lhs.false.i: ; preds = %while.end.i, %while.cond.i
%3 = phi i8 [ %2, %while.end.i ], [ 0, %while.cond.i ]
%cmp11.i = icmp eq i8 %.pre.i, 42
br i1 %cmp11.i, label %if.end3, label %dict_match.exit
dict_match.exit: ; preds = %lor.lhs.false.i
%cmp14.i = icmp eq i8 %3, 46
%conv16.i = sext i8 %3 to i32
%.conv16.i = select i1 %cmp14.i, i32 0, i32 %conv16.i
%cmp18.i = icmp eq i8 %.pre.i, 46
%conv22.i = sext i8 %.pre.i to i32
%cond24.i = select i1 %cmp18.i, i32 0, i32 %conv22.i
%sub.i = sub nsw i32 %.conv16.i, %cond24.i
%cmp1 = icmp sgt i32 %sub.i, -1
br i1 %cmp1, label %if.end3, label %if.then10
if.end3: ; preds = %dict_match.exit, %lor.lhs.false.i, %while.end.i
; CHECK: %if.end3
; CHECK: cmp
; CHECK-NOT: cbnz
%storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
%right = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
%4 = load %struct.Dict_node_struct** %right, align 4
tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
%cmp4 = icmp eq i32 %storemerge1.i3, 0
br i1 %cmp4, label %if.then5, label %if.end8
if.then5: ; preds = %if.end3
%call6 = tail call fastcc i8* @xalloc(i32 20)
%5 = bitcast i8* %call6 to %struct.Dict_node_struct*
%6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call6, i8* %6, i32 16, i32 4, i1 false)
%7 = load %struct.Dict_node_struct** @lookup_list, align 4
%right7 = getelementptr inbounds i8* %call6, i32 16
%8 = bitcast i8* %right7 to %struct.Dict_node_struct**
store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4
br label %if.then10
if.end8: ; preds = %if.end3
%cmp9 = icmp slt i32 %storemerge1.i3, 1
br i1 %cmp9, label %if.then10, label %if.end11
if.then10: ; preds = %if.end8, %if.then5, %dict_match.exit
%left = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
%9 = load %struct.Dict_node_struct** %left, align 4
br label %tailrecurse
if.end11: ; preds = %if.end8, %tailrecurse
ret void
}
; Materializable
declare hidden fastcc i8* @xalloc(i32) nounwind ssp