[SystemZ] Rework compare and branch support

Before the patch we took advantage of the fact that the compare and
branch are glued together in the selection DAG and fused them together
(where possible) while emitting them.  This seemed to work well in practice.
However, fusing the compare so early makes it harder to remove redundant
compares in cases where CC already has a suitable value.  This patch
therefore uses the peephole analyzeCompare/optimizeCompareInstr pair of
functions instead.

No behavioral change intended, but it paves the way for a later patch.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187116 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-07-25 09:34:38 +00:00
parent bf99364f81
commit ea14085be5
6 changed files with 151 additions and 62 deletions

View File

@ -1695,34 +1695,6 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
return NewMBB;
}
bool SystemZTargetLowering::
convertPrevCompareToBranch(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MBBI,
unsigned CCMask, MachineBasicBlock *Target) const {
MachineBasicBlock::iterator Compare = MBBI;
MachineBasicBlock::iterator Begin = MBB->begin();
do
{
if (Compare == Begin)
return false;
--Compare;
}
while (Compare->isDebugValue());
const SystemZInstrInfo *TII = TM.getInstrInfo();
unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
Compare);
if (!FusedOpcode)
return false;
DebugLoc DL = Compare->getDebugLoc();
BuildMI(*MBB, MBBI, DL, TII->get(FusedOpcode))
.addOperand(Compare->getOperand(0)).addOperand(Compare->getOperand(1))
.addImm(CCMask).addMBB(Target);
Compare->removeFromParent();
return true;
}
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
@ -1742,14 +1714,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
// StartMBB:
// BRC CCMask, JoinMBB
// # fallthrough to FalseMBB
//
// The original DAG glues comparisons to their uses, both to ensure
// that no CC-clobbering instructions are inserted between them, and
// to ensure that comparison results are not reused. This means that
// this Select is the sole user of any preceding comparison instruction
// and that we can try to use a fused compare and branch instead.
MBB = StartMBB;
if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB))
BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB);
MBB->addSuccessor(JoinMBB);
MBB->addSuccessor(FalseMBB);
@ -1814,14 +1779,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
// StartMBB:
// BRC CCMask, JoinMBB
// # fallthrough to FalseMBB
//
// The original DAG glues comparisons to their uses, both to ensure
// that no CC-clobbering instructions are inserted between them, and
// to ensure that comparison results are not reused. This means that
// this CondStore is the sole user of any preceding comparison instruction
// and that we can try to use a fused compare and branch instead.
MBB = StartMBB;
if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB))
BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB);
MBB->addSuccessor(JoinMBB);
MBB->addSuccessor(FalseMBB);
@ -2475,16 +2433,6 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::ATOMIC_CMP_SWAPW:
return emitAtomicCmpSwapW(MI, MBB);
case SystemZ::BRC:
// The original DAG glues comparisons to their uses, both to ensure
// that no CC-clobbering instructions are inserted between them, and
// to ensure that comparison results are not reused. This means that
// a BRC is the sole user of a preceding comparison and that we can
// try to use a fused compare and branch instead.
if (convertPrevCompareToBranch(MBB, MI, MI->getOperand(0).getImm(),
MI->getOperand(1).getMBB()))
MI->eraseFromParent();
return MBB;
case SystemZ::MVCWrapper:
return emitMVCWrapper(MI, MBB);
default:

View File

@ -1036,6 +1036,7 @@ class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
[(operator cls1:$R1, cls2:$R2)]> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
let isCompare = 1;
}
class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@ -1045,25 +1046,31 @@ class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
[(operator cls1:$R1, cls2:$R2)]> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
let isCompare = 1;
}
class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRI<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#"\t$R1, $I2",
[(operator cls:$R1, imm:$I2)]>;
[(operator cls:$R1, imm:$I2)]> {
let isCompare = 1;
}
class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#"\t$R1, $I2",
[(operator cls:$R1, imm:$I2)]>;
[(operator cls:$R1, imm:$I2)]> {
let isCompare = 1;
}
class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load>
: InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
mnemonic#"\t$R1, $I2",
[(operator cls:$R1, (load pcrel32:$I2))]> {
let isCompare = 1;
let mayLoad = 1;
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
@ -1079,6 +1086,7 @@ class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
[(operator cls:$R1, (load mode:$XBD2))]> {
let OpKey = mnemonic ## cls;
let OpType = "mem";
let isCompare = 1;
let mayLoad = 1;
let AccessBytes = bytes;
}
@ -1090,6 +1098,7 @@ class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
[(operator cls:$R1, (load bdxaddr12only:$XBD2))]> {
let OpKey = mnemonic ## cls;
let OpType = "mem";
let isCompare = 1;
let mayLoad = 1;
let AccessBytes = bytes;
}
@ -1102,6 +1111,7 @@ class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
[(operator cls:$R1, (load mode:$XBD2))]> {
let OpKey = mnemonic ## cls;
let OpType = "mem";
let isCompare = 1;
let mayLoad = 1;
let AccessBytes = bytes;
}
@ -1125,6 +1135,7 @@ class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
: InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator (load mode:$BD1), imm:$I2)]> {
let isCompare = 1;
let mayLoad = 1;
}
@ -1133,6 +1144,7 @@ class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator (load bdaddr12only:$BD1), imm:$I2)]> {
let isCompare = 1;
let mayLoad = 1;
}
@ -1142,6 +1154,7 @@ class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator (load mode:$BD1), imm:$I2)]> {
let isCompare = 1;
let mayLoad = 1;
}

View File

@ -277,6 +277,109 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return Count;
}
bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
unsigned &SrcReg, unsigned &SrcReg2,
int &Mask, int &Value) const {
assert(MI->isCompare() && "Caller should check that this is a compare");
// Ignore comparisons involving memory for now.
if (MI->getNumExplicitOperands() != 2)
return false;
SrcReg = MI->getOperand(0).getReg();
if (MI->getOperand(1).isReg()) {
SrcReg2 = MI->getOperand(1).getReg();
Value = 0;
Mask = ~0;
return true;
} else if (MI->getOperand(1).isImm()) {
SrcReg2 = 0;
Value = MI->getOperand(1).getImm();
Mask = ~0;
return true;
}
return false;
}
// Return true if CC is live after MBBI. We can't rely on kill information
// because of the way InsertBranch is used.
static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
const TargetRegisterInfo *TRI) {
if (MBBI->killsRegister(SystemZ::CC, TRI))
return false;
MachineBasicBlock *MBB = MBBI->getParent();
MachineBasicBlock::iterator MBBE = MBB->end();
for (++MBBI; MBBI != MBBE; ++MBBI)
if (MBBI->readsRegister(SystemZ::CC, TRI))
return true;
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI)
if ((*SI)->isLiveIn(SystemZ::CC))
return true;
return false;
}
bool
SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
unsigned SrcReg, unsigned SrcReg2,
int Mask, int Value,
const MachineRegisterInfo *MRI) const {
MachineBasicBlock *MBB = Compare->getParent();
const TargetRegisterInfo *TRI = &getRegisterInfo();
// Try to fold a comparison into a following branch, if it is only used once.
if (unsigned FusedOpcode = getCompareAndBranch(Compare->getOpcode(),
Compare)) {
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end();
for (++MBBI; MBBI != MBBE; ++MBBI) {
if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) {
// Read the branch mask and target.
MachineOperand CCMask(MBBI->getOperand(0));
MachineOperand Target(MBBI->getOperand(1));
// Clear out all current operands.
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
assert(CCUse >= 0 && "BRC must use CC");
MBBI->RemoveOperand(CCUse);
MBBI->RemoveOperand(1);
MBBI->RemoveOperand(0);
// Rebuild MBBI as a fused compare and branch.
MBBI->setDesc(get(FusedOpcode));
MachineInstrBuilder(*MBB->getParent(), MBBI)
.addOperand(Compare->getOperand(0))
.addOperand(Compare->getOperand(1))
.addOperand(CCMask)
.addOperand(Target);
// Clear any intervening kills of SrcReg and SrcReg2.
MBBI = Compare;
for (++MBBI; MBBI != MBBE; ++MBBI) {
MBBI->clearRegisterKills(SrcReg, TRI);
if (SrcReg2)
MBBI->clearRegisterKills(SrcReg2, TRI);
}
Compare->removeFromParent();
return true;
}
// Stop if we find another reference to CC before a branch.
if (MBBI->readsRegister(SystemZ::CC, TRI) ||
MBBI->modifiesRegister(SystemZ::CC, TRI))
break;
// Stop if we find another assignment to the registers before the branch.
if (MBBI->modifiesRegister(SrcReg, TRI) ||
(SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
break;
}
}
return false;
}
// If Opcode is a move that has a conditional variant, return that variant,
// otherwise return 0.
static unsigned getConditionalMove(unsigned Opcode) {

View File

@ -104,6 +104,14 @@ public:
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const LLVM_OVERRIDE;
virtual bool analyzeCompare(const MachineInstr *MI,
unsigned &SrcReg, unsigned &SrcReg2,
int &Mask, int &Value) const LLVM_OVERRIDE;
virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
unsigned SrcReg, unsigned SrcReg2,
int Mask, int Value,
const MachineRegisterInfo *MRI) const
LLVM_OVERRIDE;
virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE;
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,

View File

@ -58,18 +58,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
// in their raw BRC/BRCL form, with the 4-bit condition-code mask being
// the first operand. It seems friendlier to use mnemonic forms like
// JE and JLH when writing out the assembly though.
//
// Using a custom inserter for BRC gives us a chance to convert the BRC
// and a preceding compare into a single compare-and-branch instruction.
// The inserter makes no change in cases where a separate branch really
// is needed.
multiclass CondBranches<Operand ccmask, string short, string long> {
let isBranch = 1, isTerminator = 1, Uses = [CC] in {
def "" : InstRI<0xA74, (outs), (ins ccmask:$R1, brtarget16:$I2), short, []>;
def L : InstRIL<0xC04, (outs), (ins ccmask:$R1, brtarget32:$I2), long, []>;
}
}
let isCodeGenOnly = 1, usesCustomInserter = 1 in
let isCodeGenOnly = 1 in
defm BRC : CondBranches<cond4, "j$R1\t$I2", "jg$R1\t$I2">;
defm AsmBRC : CondBranches<uimm8zx4, "brc\t$R1, $I2", "brcl\t$R1, $I2">;

View File

@ -2,6 +2,8 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare i32 @foo()
; Check register comparison.
define double @f1(double %a, double %b, i32 %i1, i32 %i2) {
; CHECK-LABEL: f1:
@ -159,3 +161,23 @@ define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
%res = select i1 %cond, double %a, double %b
ret double %res
}
; The first branch here got recreated by InsertBranch while splitting the
; critical edge %entry->%while.body, which lost the kills information for CC.
define void @f12(i32 %a, i32 %b) {
; CHECK-LABEL: f12:
; CHECK: crje %r2,
; CHECK: crjlh %r2,
; CHECK: br %r14
entry:
%cmp11 = icmp eq i32 %a, %b
br i1 %cmp11, label %while.end, label %while.body
while.body:
%c = call i32 @foo()
%cmp12 = icmp eq i32 %c, %b
br i1 %cmp12, label %while.end, label %while.body
while.end:
ret void
}