[SystemZ] Reuse CC results for integer comparisons with zero

This also fixes a bug in the predication of LR to LOCR: I'd forgotten
that with these in-place instruction builds, the implicit operands need
to be added manually.  I think this was latent until now, but is tested
by int-cmp-45.c.  It also adds a CC valid mask to STOC, again tested by
int-cmp-45.c.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187573 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-08-01 10:39:40 +00:00
parent 3237f88882
commit 8f0ad5ae8f
8 changed files with 969 additions and 59 deletions

View File

@ -1813,7 +1813,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
if (Invert)
CCMask ^= CCValid;
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
.addReg(SrcReg).addOperand(Base).addImm(Disp).addImm(CCMask);
.addReg(SrcReg).addOperand(Base).addImm(Disp)
.addImm(CCValid).addImm(CCMask);
MI->eraseFromParent();
return MBB;
}

View File

@ -61,12 +61,41 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
// The access size of all memory operands in bytes, or 0 if not known.
bits<5> AccessBytes = 0;
let TSFlags{0} = SimpleBDXLoad;
let TSFlags{1} = SimpleBDXStore;
let TSFlags{2} = Has20BitOffset;
let TSFlags{3} = HasIndex;
let TSFlags{4} = Is128Bit;
let TSFlags{9-5} = AccessBytes;
// If the instruction sets CC to a useful value, this gives the mask
// of all possible CC results. The mask has the same form as
// SystemZ::CCMASK_*.
bits<4> CCValues = 0;
// True if the instruction sets CC to 0 when the result is 0.
bit CCHasZero = 0;
// True if the instruction sets CC to 1 when the result is less than 0
// and to 2 when the result is greater than 0.
bit CCHasOrder = 0;
// True if the instruction is conditional and if the CC mask operand
// comes first (as for BRC, etc.).
bit CCMaskFirst = 0;
// Similar, but true if the CC mask operand comes last (as for LOC, etc.).
bit CCMaskLast = 0;
// True if the instruction is the "logical" rather than "arithmetic" form,
// in cases where a distinction exists.
bit IsLogical = 0;
let TSFlags{0} = SimpleBDXLoad;
let TSFlags{1} = SimpleBDXStore;
let TSFlags{2} = Has20BitOffset;
let TSFlags{3} = HasIndex;
let TSFlags{4} = Is128Bit;
let TSFlags{9-5} = AccessBytes;
let TSFlags{13-10} = CCValues;
let TSFlags{14} = CCHasZero;
let TSFlags{15} = CCHasOrder;
let TSFlags{16} = CCMaskFirst;
let TSFlags{17} = CCMaskLast;
let TSFlags{18} = IsLogical;
}
//===----------------------------------------------------------------------===//
@ -623,11 +652,12 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
class CondStoreRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
: InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$R3),
: InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3),
mnemonic#"$R3\t$R1, $BD2", []>,
Requires<[FeatureLoadStoreOnCond]> {
let mayStore = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
}
// Like CondStoreRSY, but used for the raw assembly form. The condition-code
@ -686,7 +716,9 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3),
mnemonic#"r$R3\t$R1, $R2", []>,
Requires<[FeatureLoadStoreOnCond]>;
Requires<[FeatureLoadStoreOnCond]> {
let CCMaskLast = 1;
}
// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
@ -748,6 +780,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
let DisableEncoding = "$R1src";
let mayLoad = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
}
// Like CondUnaryRSY, but used for the raw assembly form. The condition-code

View File

@ -341,7 +341,8 @@ PredicateInstruction(MachineInstr *MI,
if (unsigned CondOpcode = getConditionalMove(Opcode)) {
MI->setDesc(get(CondOpcode));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(CCValid).addImm(CCMask);
.addImm(CCValid).addImm(CCMask)
.addReg(SystemZ::CC, RegState::Implicit);;
return true;
}
}

View File

@ -28,17 +28,27 @@ class SystemZTargetMachine;
namespace SystemZII {
enum {
// See comments in SystemZInstrFormats.td.
SimpleBDXLoad = (1 << 0),
SimpleBDXStore = (1 << 1),
Has20BitOffset = (1 << 2),
HasIndex = (1 << 3),
Is128Bit = (1 << 4),
AccessSizeMask = (31 << 5),
AccessSizeShift = 5
SimpleBDXLoad = (1 << 0),
SimpleBDXStore = (1 << 1),
Has20BitOffset = (1 << 2),
HasIndex = (1 << 3),
Is128Bit = (1 << 4),
AccessSizeMask = (31 << 5),
AccessSizeShift = 5,
CCValuesMask = (15 << 10),
CCValuesShift = 10,
CCHasZero = (1 << 14),
CCHasOrder = (1 << 15),
CCMaskFirst = (1 << 16),
CCMaskLast = (1 << 17),
IsLogical = (1 << 18)
};
static inline unsigned getAccessSize(unsigned int Flags) {
return (Flags & AccessSizeMask) >> AccessSizeShift;
}
static inline unsigned getCCValues(unsigned int Flags) {
return (Flags & CCValuesMask) >> CCValuesShift;
}
// SystemZ MachineOperand target flags.
enum {

View File

@ -59,7 +59,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
// the first operand. It seems friendlier to use mnemonic forms like
// JE and JLH when writing out the assembly though.
let isBranch = 1, isTerminator = 1, Uses = [CC] in {
let isCodeGenOnly = 1 in {
let isCodeGenOnly = 1, CCMaskFirst = 1 in {
def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1,
brtarget16:$I2), "j$R1\t$I2",
[(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
@ -195,7 +195,7 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
// The definitions here are for the call-clobbered registers.
let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC],
R1 = 14, isCodeGenOnly = 1 in {
def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops),
"bras\t%r14, $I2", []>;
@ -512,9 +512,12 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
//===----------------------------------------------------------------------===//
let Defs = [CC] in {
def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
let CCValues = 0xF, CCHasZero = 1 in {
def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
}
let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
}
defm : SXU<ineg, LCGFR>;
@ -566,7 +569,7 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
//===----------------------------------------------------------------------===//
// Plain addition.
let Defs = [CC] in {
let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
// Addition of a register.
let isCommutable = 1 in {
defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>;
@ -637,7 +640,7 @@ let Defs = [CC], Uses = [CC] in {
// Plain substraction. Although immediate forms exist, we use the
// add-immediate instruction instead.
let Defs = [CC] in {
let Defs = [CC], CCValues = 0xF, CCHasZero = 1 in {
// Subtraction of a register.
defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>;
def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>;
@ -687,13 +690,14 @@ let Defs = [CC], Uses = [CC] in {
let Defs = [CC] in {
// ANDs of a register.
let isCommutable = 1 in {
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>;
defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
}
let isConvertibleToThreeAddress = 1 in {
// ANDs of a 16-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 16-bit field, not the full register.
let isCodeGenOnly = 1 in {
def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
@ -704,15 +708,19 @@ let Defs = [CC] in {
def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
// ANDs of a 32-bit immediate, leaving other bits unaffected.
let isCodeGenOnly = 1 in
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
}
// ANDs of memory.
defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
let CCValues = 0xC, CCHasZero = 1 in {
defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
}
// AND to memory
defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
@ -726,12 +734,13 @@ defm : RMWIByte<and, bdaddr20pair, NIY>;
let Defs = [CC] in {
// ORs of a register.
let isCommutable = 1 in {
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>;
defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>;
}
// ORs of a 16-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 16-bit field, not the full register.
let isCodeGenOnly = 1 in {
def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
@ -742,14 +751,18 @@ let Defs = [CC] in {
def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
// ORs of a 32-bit immediate, leaving other bits unaffected.
let isCodeGenOnly = 1 in
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
// ORs of memory.
defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
let CCValues = 0xC, CCHasZero = 1 in {
defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
}
// OR to memory
defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
@ -763,20 +776,24 @@ defm : RMWIByte<or, bdaddr20pair, OIY>;
let Defs = [CC] in {
// XORs of a register.
let isCommutable = 1 in {
let isCommutable = 1, CCValues = 0xC, CCHasZero = 1 in {
defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>;
defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>;
}
// XORs of a 32-bit immediate, leaving other bits unaffected.
let isCodeGenOnly = 1 in
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
let isCodeGenOnly = 1, CCValues = 0xC, CCHasZero = 1 in
def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
// XORs of memory.
defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
let CCValues = 0xC, CCHasZero = 1 in {
defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
}
// XOR to memory
defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
@ -849,7 +866,7 @@ let neverHasSideEffects = 1 in {
}
// Arithmetic shift right.
let Defs = [CC] in {
let Defs = [CC], CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in {
defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
}
@ -862,11 +879,12 @@ let neverHasSideEffects = 1 in {
// Rotate second operand left and inserted selected bits into first operand.
// These can act like 32-bit operands provided that the constant start and
// end bits (operands 2 and 3) are in the range [32, 64)
// end bits (operands 2 and 3) are in the range [32, 64).
let Defs = [CC] in {
let isCodeGenOnly = 1 in
def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
let CCValues = 0xE, CCHasZero = 1, CCHasOrder = 1 in
def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
}
// Forms of RISBG that only affect one word of the destination register.
@ -880,7 +898,8 @@ def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>,
Requires<[FeatureHighWord]>;
// Rotate second operand left and perform a logical operation with selected
// bits of the first operand.
// bits of the first operand. The CC result only describes the selected bits,
// so isn't useful for a full comparison against zero.
let Defs = [CC] in {
def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>;
def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>;
@ -892,7 +911,7 @@ let Defs = [CC] in {
//===----------------------------------------------------------------------===//
// Signed comparisons.
let Defs = [CC] in {
let Defs = [CC], CCValues = 0xE in {
// Comparison with a register.
def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>;
def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
@ -926,7 +945,7 @@ let Defs = [CC] in {
defm : SXB<z_cmp, GR64, CGFR>;
// Unsigned comparisons.
let Defs = [CC] in {
let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
// Comparison with a register.
def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>;
def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;

View File

@ -7,18 +7,36 @@
//
//===----------------------------------------------------------------------===//
//
// This pass does two things:
// (1) fuse compares and branches into COMPARE AND BRANCH instructions
// (2) make sure that all branches are in range.
// This pass does three things:
// (1) try to remove compares if CC already contains the required information
// (2) fuse compares and branches into COMPARE AND BRANCH instructions
// (3) make sure that all branches are in range.
//
// We do (1) here rather than earlier because the fused form prevents
// predication.
// We do (1) here rather than earlier because some transformations can
// change the set of available CC values and we generally want those
// transformations to have priority over (1). This is especially true in
// the commonest case where the CC value is used by a single in-range branch
// instruction, since (2) will then be able to fuse the compare and the
// branch instead.
//
// Doing it so late makes it more likely that a register will be reused
// For example, two-address NILF can sometimes be converted into
// three-address RISBLG. NILF produces a CC value that indicates whether
// the low word is zero, but RISBLG does not modify CC at all. On the
// other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG.
// The CC value produced by NILL isn't useful for our purposes, but the
// value produced by RISBG can be used for any comparison with zero
// (not just equality). So there are some transformations that lose
// CC values (while still being worthwhile) and others that happen to make
// the CC result more useful than it was originally.
//
// We do (2) here rather than earlier because the fused form prevents
// predication. It also has to happen after (1).
//
// Doing (2) so late makes it more likely that a register will be reused
// between the compare and the branch, but it isn't clear whether preventing
// that would be a win or not.
//
// There are several ways in which (2) could be done. One aggressive
// There are several ways in which (3) could be done. One aggressive
// approach is to assume that all branches are in range and successively
// replace those that turn out not to be in range with a longer form
// (branch relaxation). A simple implementation is to continually walk
@ -156,6 +174,7 @@ namespace {
void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
bool AssumeRelaxed);
TerminatorInfo describeTerminator(MachineInstr *MI);
bool optimizeCompareZero(MachineInstr *PrevCCSetter, MachineInstr *Compare);
bool fuseCompareAndBranch(MachineInstr *Compare);
uint64_t initMBBInfo();
bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
@ -254,6 +273,15 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
return Terminator;
}
// Return true if CC is live out of MBB.
static bool isCCLiveOut(MachineBasicBlock *MBB) {
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI)
if ((*SI)->isLiveIn(SystemZ::CC))
return true;
return false;
}
// Return true if CC is live after MBBI.
static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
const TargetRegisterInfo *TRI) {
@ -269,12 +297,130 @@ static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
return false;
}
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI)
if ((*SI)->isLiveIn(SystemZ::CC))
return true;
return isCCLiveOut(MBB);
}
return false;
// Return true if all uses of the CC value produced by MBBI could make do
// with the CC values in ReusableCCMask. When returning true, point AlterMasks
// to the "CC valid" and "CC mask" operands for each condition.
static bool canRestrictCCMask(MachineBasicBlock::iterator MBBI,
unsigned ReusableCCMask,
SmallVectorImpl<MachineOperand *> &AlterMasks,
const TargetRegisterInfo *TRI) {
MachineBasicBlock *MBB = MBBI->getParent();
MachineBasicBlock::iterator MBBE = MBB->end();
for (++MBBI; MBBI != MBBE; ++MBBI) {
if (MBBI->readsRegister(SystemZ::CC, TRI)) {
// Fail if this isn't a use of CC that we understand.
unsigned MBBIFlags = MBBI->getDesc().TSFlags;
unsigned FirstOpNum;
if (MBBIFlags & SystemZII::CCMaskFirst)
FirstOpNum = 0;
else if (MBBIFlags & SystemZII::CCMaskLast)
FirstOpNum = MBBI->getNumExplicitOperands() - 2;
else
return false;
// Check whether the instruction predicate treats all CC values
// outside of ReusableCCMask in the same way. In that case it
// doesn't matter what those CC values mean.
unsigned CCValid = MBBI->getOperand(FirstOpNum).getImm();
unsigned CCMask = MBBI->getOperand(FirstOpNum + 1).getImm();
unsigned OutValid = ~ReusableCCMask & CCValid;
unsigned OutMask = ~ReusableCCMask & CCMask;
if (OutMask != 0 && OutMask != OutValid)
return false;
AlterMasks.push_back(&MBBI->getOperand(FirstOpNum));
AlterMasks.push_back(&MBBI->getOperand(FirstOpNum + 1));
// Succeed if this was the final use of the CC value.
if (MBBI->killsRegister(SystemZ::CC, TRI))
return true;
}
// Succeed if the instruction redefines CC.
if (MBBI->definesRegister(SystemZ::CC, TRI))
return true;
}
// Fail if there are other uses of CC that we didn't see.
return !isCCLiveOut(MBB);
}
// Try to make Compare redundant with PrevCCSetter, the previous setter of CC,
// by looking for cases where Compare compares the result of PrevCCSetter
// against zero. Return true on success and if Compare can therefore
// be deleted.
bool SystemZLongBranch::optimizeCompareZero(MachineInstr *PrevCCSetter,
MachineInstr *Compare) {
if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
return false;
// Check whether this is a comparison against zero.
if (Compare->getNumExplicitOperands() != 2 ||
!Compare->getOperand(1).isImm() ||
Compare->getOperand(1).getImm() != 0)
return false;
// See which compare-style condition codes are available after PrevCCSetter.
unsigned PrevFlags = PrevCCSetter->getDesc().TSFlags;
unsigned ReusableCCMask = 0;
if (PrevFlags & SystemZII::CCHasZero)
ReusableCCMask |= SystemZ::CCMASK_CMP_EQ;
// For unsigned comparisons with zero, only equality makes sense.
unsigned CompareFlags = Compare->getDesc().TSFlags;
if (!(CompareFlags & SystemZII::IsLogical) &&
(PrevFlags & SystemZII::CCHasOrder))
ReusableCCMask |= SystemZ::CCMASK_CMP_LT | SystemZ::CCMASK_CMP_GT;
if (ReusableCCMask == 0)
return false;
// Make sure that PrevCCSetter sets the value being compared.
unsigned SrcReg = Compare->getOperand(0).getReg();
unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
if (!PrevCCSetter->getOperand(0).isReg() ||
!PrevCCSetter->getOperand(0).isDef() ||
PrevCCSetter->getOperand(0).getReg() != SrcReg ||
PrevCCSetter->getOperand(0).getSubReg() != SrcSubReg)
return false;
// Make sure that SrcReg survives until Compare.
MachineBasicBlock::iterator MBBI = PrevCCSetter, MBBE = Compare;
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
for (++MBBI; MBBI != MBBE; ++MBBI)
if (MBBI->modifiesRegister(SrcReg, TRI))
return false;
// See whether all uses of Compare's CC value could make do with
// the values produced by PrevCCSetter.
SmallVector<MachineOperand *, 4> AlterMasks;
if (!canRestrictCCMask(Compare, ReusableCCMask, AlterMasks, TRI))
return false;
// Alter the CC masks that canRestrictCCMask says need to be altered.
unsigned CCValues = SystemZII::getCCValues(PrevFlags);
assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues");
for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) {
AlterMasks[I]->setImm(CCValues);
unsigned CCMask = AlterMasks[I + 1]->getImm();
if (CCMask & ~ReusableCCMask)
AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) |
(CCValues & ~ReusableCCMask));
}
// CC is now live after PrevCCSetter.
int CCDef = PrevCCSetter->findRegisterDefOperandIdx(SystemZ::CC, false,
true, TRI);
assert(CCDef >= 0 && "Couldn't find CC set");
PrevCCSetter->getOperand(CCDef).setIsDead(false);
// Clear any intervening kills of CC.
MBBI = PrevCCSetter;
for (++MBBI; MBBI != MBBE; ++MBBI)
MBBI->clearRegisterKills(SystemZ::CC, TRI);
return true;
}
// Try to fuse compare instruction Compare into a later branch. Return
@ -345,6 +491,8 @@ bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) {
// that no branches need relaxation. Return the size of the function under
// this assumption.
uint64_t SystemZLongBranch::initMBBInfo() {
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
MF->RenumberBlocks();
unsigned NumBlocks = MF->size();
@ -365,13 +513,20 @@ uint64_t SystemZLongBranch::initMBBInfo() {
// Calculate the size of the fixed part of the block.
MachineBasicBlock::iterator MI = MBB->begin();
MachineBasicBlock::iterator End = MBB->end();
MachineInstr *PrevCCSetter = 0;
while (MI != End && !MI->isTerminator()) {
MachineInstr *Current = MI;
++MI;
if (Current->isCompare() && fuseCompareAndBranch(Current))
Current->removeFromParent();
else
Block.Size += TII->getInstSizeInBytes(Current);
if (Current->isCompare()) {
if ((PrevCCSetter && optimizeCompareZero(PrevCCSetter, Current)) ||
fuseCompareAndBranch(Current)) {
Current->removeFromParent();
continue;
}
}
if (Current->modifiesRegister(SystemZ::CC, TRI))
PrevCCSetter = Current;
Block.Size += TII->getInstSizeInBytes(Current);
}
skipNonTerminators(Position, Block);

View File

@ -0,0 +1,576 @@
; Test that compares are ommitted if CC already has the right value
; (z10 version).
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
declare void @foo()
; Addition provides enough for equality comparisons with zero. First teest
; the EQ case.
define i32 @f1(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f1:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: je .L{{.*}}
; CHECK: br %r14
entry:
%res = add i32 %a, 1000000
%cmp = icmp eq i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...and again with NE.
define i32 @f2(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f2:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: jne .L{{.*}}
; CHECK: br %r14
entry:
%res = add i32 %a, 1000000
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; SLT requires a comparison.
define i32 @f3(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f3:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%res = add i32 %a, 1000000
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...SLE too.
define i32 @f4(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f4:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: cijle %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%res = add i32 %a, 1000000
%cmp = icmp sle i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...SGT too.
define i32 @f5(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f5:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: cijh %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%res = add i32 %a, 1000000
%cmp = icmp sgt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...SGE too.
define i32 @f6(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f6:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: cijhe %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%res = add i32 %a, 1000000
%cmp = icmp sge i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; Subtraction also provides enough for equality comparisons with zero.
define i32 @f7(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f7:
; CHECK: s %r2, 0(%r4)
; CHECK-NEXT: jne .L{{.*}}
; CHECK: br %r14
entry:
%cur = load i32 *%dest
%res = sub i32 %a, %cur
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...but not for ordered comparisons.
define i32 @f8(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f8:
; CHECK: s %r2, 0(%r4)
; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%cur = load i32 *%dest
%res = sub i32 %a, %cur
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; Logic register-register instructions also provide enough for equality
; comparisons with zero.
define i32 @f9(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f9:
; CHECK: nr %r2, %r3
; CHECK-NEXT: jl .L{{.*}}
; CHECK: br %r14
entry:
%res = and i32 %a, %b
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...but not for ordered comparisons.
define i32 @f10(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f10:
; CHECK: nr %r2, %r3
; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%res = and i32 %a, %b
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; Logic register-immediate instructions also provide enough for equality
; comparisons with zero if the immediate covers the whole register.
define i32 @f11(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f11:
; CHECK: nilf %r2, 100
; CHECK-NEXT: jl .L{{.*}}
; CHECK: br %r14
entry:
%res = and i32 %a, 100
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; Partial logic register-immediate instructions do not provide simple
; zero results.
define i32 @f12(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f12:
; CHECK: nill %r2, 65436
; CHECK-NEXT: cijlh %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%res = and i32 %a, -100
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; SRA provides the same CC result as a comparison with zero.
define i32 @f13(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f13:
; CHECK: sra %r2, 0(%r3)
; CHECK-NEXT: je .L{{.*}}
; CHECK: br %r14
entry:
%res = ashr i32 %a, %b
%cmp = icmp eq i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...and again with NE.
define i32 @f14(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f14:
; CHECK: sra %r2, 0(%r3)
; CHECK-NEXT: jlh .L{{.*}}
; CHECK: br %r14
entry:
%res = ashr i32 %a, %b
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...and SLT.
define i32 @f15(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f15:
; CHECK: sra %r2, 0(%r3)
; CHECK-NEXT: jl .L{{.*}}
; CHECK: br %r14
entry:
%res = ashr i32 %a, %b
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...and SLE.
define i32 @f16(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f16:
; CHECK: sra %r2, 0(%r3)
; CHECK-NEXT: jle .L{{.*}}
; CHECK: br %r14
entry:
%res = ashr i32 %a, %b
%cmp = icmp sle i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...and SGT.
define i32 @f17(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f17:
; CHECK: sra %r2, 0(%r3)
; CHECK-NEXT: jh .L{{.*}}
; CHECK: br %r14
entry:
%res = ashr i32 %a, %b
%cmp = icmp sgt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...and SGE.
define i32 @f18(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f18:
; CHECK: sra %r2, 0(%r3)
; CHECK-NEXT: jhe .L{{.*}}
; CHECK: br %r14
entry:
%res = ashr i32 %a, %b
%cmp = icmp sge i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; RISBG provides the same result as a comparison against zero.
; Test the EQ case.
define i64 @f19(i64 %a, i64 %b, i64 *%dest) {
; CHECK-LABEL: f19:
; CHECK: risbg %r2, %r3, 0, 190, 0
; CHECK-NEXT: je .L{{.*}}
; CHECK: br %r14
entry:
%res = and i64 %b, -2
%cmp = icmp eq i64 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i64 %b, i64 *%dest
br label %exit
exit:
ret i64 %res
}
; ...and the SLT case.
define i64 @f20(i64 %a, i64 %b, i64 *%dest) {
; CHECK-LABEL: f20:
; CHECK: risbg %r2, %r3, 0, 190, 0
; CHECK-NEXT: jl .L{{.*}}
; CHECK: br %r14
entry:
%res = and i64 %b, -2
%cmp = icmp slt i64 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i64 %b, i64 *%dest
br label %exit
exit:
ret i64 %res
}
; Test a case where the register we're testing is set by a non-CC-clobbering
; instruction.
define i32 @f21(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f21:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: #APP
; CHECK-NEXT: blah %r2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: cije %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%add = add i32 %a, 1000000
%res = call i32 asm "blah $0", "=r,0" (i32 %add)
%cmp = icmp eq i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; ...and again with a CC-clobbering instruction.
define i32 @f22(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f22:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: #APP
; CHECK-NEXT: blah %r2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: cije %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%add = add i32 %a, 1000000
%res = call i32 asm "blah $0", "=r,0,~{cc}" (i32 %add)
%cmp = icmp eq i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest
br label %exit
exit:
ret i32 %res
}
; Check that stores do not interfere.
define i32 @f23(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) {
; CHECK-LABEL: f23:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: st %r2, 0(%r4)
; CHECK-NEXT: jne .L{{.*}}
; CHECK: br %r14
entry:
%res = add i32 %a, 1000000
store i32 %res, i32 *%dest1
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %b, i32 *%dest2
br label %exit
exit:
ret i32 %res
}
; Check that calls do interfere.
define void @f24(i32 *%ptr) {
; CHECK-LABEL: f24:
; CHECK: afi [[REG:%r[0-9]+]], 1000000
; CHECK-NEXT: brasl %r14, foo@PLT
; CHECK-NEXT: cijlh [[REG]], 0, .L{{.*}}
; CHECK: br %r14
entry:
%val = load i32 *%ptr
%xor = xor i32 %val, 1
%add = add i32 %xor, 1000000
call void @foo()
%cmp = icmp ne i32 %add, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %add, i32 *%ptr
br label %exit
exit:
ret void
}
; Check that inline asms don't interfere if they don't clobber CC.
define void @f25(i32 %a, i32 *%ptr) {
; CHECK-LABEL: f25:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: #APP
; CHECK-NEXT: blah
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jne .L{{.*}}
; CHECK: br %r14
entry:
%add = add i32 %a, 1000000
call void asm sideeffect "blah", "r"(i32 %add)
%cmp = icmp ne i32 %add, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %add, i32 *%ptr
br label %exit
exit:
ret void
}
; ...but do interfere if they do clobber CC.
define void @f26(i32 %a, i32 *%ptr) {
; CHECK-LABEL: f26:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: #APP
; CHECK-NEXT: blah
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: cijlh %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%add = add i32 %a, 1000000
call void asm sideeffect "blah", "r,~{cc}"(i32 %add)
%cmp = icmp ne i32 %add, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %add, i32 *%ptr
br label %exit
exit:
ret void
}
; Test a case where CC is set based on a different register from the
; compare input.
define i32 @f27(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) {
; CHECK-LABEL: f27:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: sr %r3, %r2
; CHECK-NEXT: st %r3, 0(%r4)
; CHECK-NEXT: cije %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%add = add i32 %a, 1000000
%sub = sub i32 %b, %add
store i32 %sub, i32 *%dest1
%cmp = icmp eq i32 %add, 0
br i1 %cmp, label %exit, label %store
store:
store i32 %sub, i32 *%dest2
br label %exit
exit:
ret i32 %add
}
; Make sure that we don't confuse a base register for a destination.
define void @f28(i64 %a, i64 *%dest) {
; CHECK-LABEL: f28:
; CHECK: xi 0(%r2), 15
; CHECK: cgije %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
%ptr = inttoptr i64 %a to i8 *
%val = load i8 *%ptr
%xor = xor i8 %val, 15
store i8 %xor, i8 *%ptr
%cmp = icmp eq i64 %a, 0
br i1 %cmp, label %exit, label %store
store:
store i64 %a, i64 *%dest
br label %exit
exit:
ret void
}

View File

@ -0,0 +1,115 @@
; Test that compares are ommitted if CC already has the right value
; (z196 version).
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
; Addition provides enough for equality comparisons with zero. First teest
; the EQ case with LOC.
define i32 @f1(i32 %a, i32 %b, i32 *%cptr) {
; CHECK-LABEL: f1:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: loce %r3, 0(%r4)
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
%c = load i32 *%cptr
%arg = select i1 %cmp, i32 %c, i32 %b
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
ret i32 %add
}
; ...and again with STOC.
define i32 @f2(i32 %a, i32 %b, i32 *%cptr) {
; CHECK-LABEL: f2:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: stoce %r3, 0(%r4)
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
%c = load i32 *%cptr
%newval = select i1 %cmp, i32 %b, i32 %c
store i32 %newval, i32 *%cptr
ret i32 %add
}
; Reverse the select order and test with LOCR.
define i32 @f3(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: f3:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: locrne %r3, %r4
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
%arg = select i1 %cmp, i32 %b, i32 %c
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
ret i32 %add
}
; ...and again with LOC.
define i32 @f4(i32 %a, i32 %b, i32 *%cptr) {
; CHECK-LABEL: f4:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: locne %r3, 0(%r4)
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
%c = load i32 *%cptr
%arg = select i1 %cmp, i32 %b, i32 %c
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
ret i32 %add
}
; ...and again with STOC.
define i32 @f5(i32 %a, i32 %b, i32 *%cptr) {
; CHECK-LABEL: f5:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: stocne %r3, 0(%r4)
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
%c = load i32 *%cptr
%newval = select i1 %cmp, i32 %c, i32 %b
store i32 %newval, i32 *%cptr
ret i32 %add
}
; Change the EQ in f3 to NE.
define i32 @f6(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: f6:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: locre %r3, %r4
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp ne i32 %add, 0
%arg = select i1 %cmp, i32 %b, i32 %c
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
ret i32 %add
}
; ...and again with LOC.
define i32 @f7(i32 %a, i32 %b, i32 *%cptr) {
; CHECK-LABEL: f7:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: loce %r3, 0(%r4)
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp ne i32 %add, 0
%c = load i32 *%cptr
%arg = select i1 %cmp, i32 %b, i32 %c
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
ret i32 %add
}
; ...and again with STOC.
define i32 @f8(i32 %a, i32 %b, i32 *%cptr) {
; CHECK-LABEL: f8:
; CHECK: afi %r2, 1000000
; CHECK-NEXT: stoce %r3, 0(%r4)
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp ne i32 %add, 0
%c = load i32 *%cptr
%newval = select i1 %cmp, i32 %c, i32 %b
store i32 %newval, i32 *%cptr
ret i32 %add
}