[SystemZ] Move compare-and-branch generation even later

r187116 moved compare-and-branch generation from the instruction-selection
pass to the peephole optimizer (via optimizeCompare).  It turns out that even
this is a bit too early.  Fused compare-and-branch instructions don't
interact well with predication, where a CC result is needed.  They also
make it harder to reuse the CC side-effects of earlier instructions
(not yet implemented, but the subject of a later patch).

Another problem was that the AnalyzeBranch family of routines weren't
handling compares and branches, so we weren't able to reverse the fused
form in cases where we would reverse a separate branch.  This could have
been fixed by extending AnalyzeBranch, but given the other problems,
I've instead moved the fusing to the long-branch pass, which is also
responsible for the opposite transformation: splitting out-of-range
compares and branches into separate compares and long branches.

I've added a test for the AnalyzeBranch problem.  A test for the
predication problem is included in the next patch, which fixes a bug
in the choice of CC mask.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187494 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-07-31 12:11:07 +00:00
parent 093043ce11
commit 0416e3c599
5 changed files with 164 additions and 136 deletions

View File

@ -1997,17 +1997,10 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
if (IsSubWord)
BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
.addReg(OldVal).addReg(BitShift).addImm(0);
unsigned FusedOpcode = TII->getCompareAndBranch(CompareOpcode);
if (FusedOpcode)
BuildMI(MBB, DL, TII->get(FusedOpcode))
.addReg(RotatedOldVal).addReg(Src2)
.addImm(KeepOldMask).addMBB(UpdateMBB);
else {
BuildMI(MBB, DL, TII->get(CompareOpcode))
.addReg(RotatedOldVal).addReg(Src2);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(KeepOldMask).addMBB(UpdateMBB);
}
BuildMI(MBB, DL, TII->get(CompareOpcode))
.addReg(RotatedOldVal).addReg(Src2);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(KeepOldMask).addMBB(UpdateMBB);
MBB->addSuccessor(UpdateMBB);
MBB->addSuccessor(UseAltMBB);
@ -2109,7 +2102,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// ^^ Replace the upper 32-BitSize bits of the
// comparison value with those that we loaded,
// so that we can use a full word comparison.
// CRJNE %Dest, %RetryCmpVal, DoneMBB
// CR %Dest, %RetryCmpVal
// JNE DoneMBB
// # Fall through to SetMBB
MBB = LoopMBB;
BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
@ -2125,8 +2119,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
.addReg(OldVal).addReg(BitShift).addImm(BitSize);
BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
.addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
BuildMI(MBB, DL, TII->get(SystemZ::CRJ))
.addReg(Dest).addReg(RetryCmpVal)
BuildMI(MBB, DL, TII->get(SystemZ::CR))
.addReg(Dest).addReg(RetryCmpVal);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(MaskNE).addMBB(DoneMBB);
MBB->addSuccessor(DoneMBB);
MBB->addSuccessor(SetMBB);

View File

@ -282,109 +282,6 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return Count;
}
bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
unsigned &SrcReg, unsigned &SrcReg2,
int &Mask, int &Value) const {
assert(MI->isCompare() && "Caller should check that this is a compare");
// Ignore comparisons involving memory for now.
if (MI->getNumExplicitOperands() != 2)
return false;
SrcReg = MI->getOperand(0).getReg();
if (MI->getOperand(1).isReg()) {
SrcReg2 = MI->getOperand(1).getReg();
Value = 0;
Mask = ~0;
return true;
} else if (MI->getOperand(1).isImm()) {
SrcReg2 = 0;
Value = MI->getOperand(1).getImm();
Mask = ~0;
return true;
}
return false;
}
// Return true if CC is live after MBBI. We can't rely on kill information
// because of the way InsertBranch is used.
static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
const TargetRegisterInfo *TRI) {
if (MBBI->killsRegister(SystemZ::CC, TRI))
return false;
MachineBasicBlock *MBB = MBBI->getParent();
MachineBasicBlock::iterator MBBE = MBB->end();
for (++MBBI; MBBI != MBBE; ++MBBI)
if (MBBI->readsRegister(SystemZ::CC, TRI))
return true;
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI)
if ((*SI)->isLiveIn(SystemZ::CC))
return true;
return false;
}
bool
SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
unsigned SrcReg, unsigned SrcReg2,
int Mask, int Value,
const MachineRegisterInfo *MRI) const {
MachineBasicBlock *MBB = Compare->getParent();
const TargetRegisterInfo *TRI = &getRegisterInfo();
// Try to fold a comparison into a following branch, if it is only used once.
if (unsigned FusedOpcode = getCompareAndBranch(Compare->getOpcode(),
Compare)) {
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end();
for (++MBBI; MBBI != MBBE; ++MBBI) {
if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) {
// Read the branch mask and target.
MachineOperand CCMask(MBBI->getOperand(0));
MachineOperand Target(MBBI->getOperand(1));
// Clear out all current operands.
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
assert(CCUse >= 0 && "BRC must use CC");
MBBI->RemoveOperand(CCUse);
MBBI->RemoveOperand(1);
MBBI->RemoveOperand(0);
// Rebuild MBBI as a fused compare and branch.
MBBI->setDesc(get(FusedOpcode));
MachineInstrBuilder(*MBB->getParent(), MBBI)
.addOperand(Compare->getOperand(0))
.addOperand(Compare->getOperand(1))
.addOperand(CCMask)
.addOperand(Target);
// Clear any intervening kills of SrcReg and SrcReg2.
MBBI = Compare;
for (++MBBI; MBBI != MBBE; ++MBBI) {
MBBI->clearRegisterKills(SrcReg, TRI);
if (SrcReg2)
MBBI->clearRegisterKills(SrcReg2, TRI);
}
Compare->removeFromParent();
return true;
}
// Stop if we find another reference to CC before a branch.
if (MBBI->readsRegister(SystemZ::CC, TRI) ||
MBBI->modifiesRegister(SystemZ::CC, TRI))
break;
// Stop if we find another assignment to the registers before the branch.
if (MBBI->modifiesRegister(SrcReg, TRI) ||
(SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
break;
}
}
return false;
}
// If Opcode is a move that has a conditional variant, return that variant,
// otherwise return 0.
static unsigned getConditionalMove(unsigned Opcode) {

View File

@ -104,14 +104,6 @@ public:
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const LLVM_OVERRIDE;
virtual bool analyzeCompare(const MachineInstr *MI,
unsigned &SrcReg, unsigned &SrcReg2,
int &Mask, int &Value) const LLVM_OVERRIDE;
virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
unsigned SrcReg, unsigned SrcReg2,
int Mask, int Value,
const MachineRegisterInfo *MRI) const
LLVM_OVERRIDE;
virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE;
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,

View File

@ -7,16 +7,26 @@
//
//===----------------------------------------------------------------------===//
//
// This pass makes sure that all branches are in range. There are several ways
// in which this could be done. One aggressive approach is to assume that all
// branches are in range and successively replace those that turn out not
// to be in range with a longer form (branch relaxation). A simple
// implementation is to continually walk through the function relaxing
// branches until no more changes are needed and a fixed point is reached.
// However, in the pathological worst case, this implementation is
// quadratic in the number of blocks; relaxing branch N can make branch N-1
// go out of range, which in turn can make branch N-2 go out of range,
// and so on.
// This pass does two things:
// (1) fuse compares and branches into COMPARE AND BRANCH instructions
// (2) make sure that all branches are in range.
//
// We do (1) here rather than earlier because the fused form prevents
// predication.
//
// Doing it so late makes it more likely that a register will be reused
// between the compare and the branch, but it isn't clear whether preventing
// that would be a win or not.
//
// There are several ways in which (2) could be done. One aggressive
// approach is to assume that all branches are in range and successively
// replace those that turn out not to be in range with a longer form
// (branch relaxation). A simple implementation is to continually walk
// through the function relaxing branches until no more changes are
// needed and a fixed point is reached. However, in the pathological
// worst case, this implementation is quadratic in the number of blocks;
// relaxing branch N can make branch N-1 go out of range, which in turn
// can make branch N-2 go out of range, and so on.
//
// An alternative approach is to assume that all branches must be
// converted to their long forms, then reinstate the short forms of
@ -146,6 +156,7 @@ namespace {
void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
bool AssumeRelaxed);
TerminatorInfo describeTerminator(MachineInstr *MI);
bool fuseCompareAndBranch(MachineInstr *Compare);
uint64_t initMBBInfo();
bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
bool mustRelaxABranch();
@ -243,6 +254,90 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
return Terminator;
}
// Return true if CC is live after MBBI.
static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
const TargetRegisterInfo *TRI) {
if (MBBI->killsRegister(SystemZ::CC, TRI))
return false;
MachineBasicBlock *MBB = MBBI->getParent();
MachineBasicBlock::iterator MBBE = MBB->end();
for (++MBBI; MBBI != MBBE; ++MBBI) {
if (MBBI->readsRegister(SystemZ::CC, TRI))
return true;
if (MBBI->definesRegister(SystemZ::CC, TRI))
return false;
}
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI)
if ((*SI)->isLiveIn(SystemZ::CC))
return true;
return false;
}
// Try to fuse compare instruction Compare into a later branch. Return
// true on success and if Compare is therefore redundant.
bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) {
if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
return false;
unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
Compare);
if (!FusedOpcode)
return false;
unsigned SrcReg = Compare->getOperand(0).getReg();
unsigned SrcReg2 = (Compare->getOperand(1).isReg() ?
Compare->getOperand(1).getReg() : 0);
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
MachineBasicBlock *MBB = Compare->getParent();
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end();
for (++MBBI; MBBI != MBBE; ++MBBI) {
if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) {
// Read the branch mask and target.
MachineOperand CCMask(MBBI->getOperand(0));
MachineOperand Target(MBBI->getOperand(1));
// Clear out all current operands.
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
assert(CCUse >= 0 && "BRC must use CC");
MBBI->RemoveOperand(CCUse);
MBBI->RemoveOperand(1);
MBBI->RemoveOperand(0);
// Rebuild MBBI as a fused compare and branch.
MBBI->setDesc(TII->get(FusedOpcode));
MachineInstrBuilder(*MBB->getParent(), MBBI)
.addOperand(Compare->getOperand(0))
.addOperand(Compare->getOperand(1))
.addOperand(CCMask)
.addOperand(Target);
// Clear any intervening kills of SrcReg and SrcReg2.
MBBI = Compare;
for (++MBBI; MBBI != MBBE; ++MBBI) {
MBBI->clearRegisterKills(SrcReg, TRI);
if (SrcReg2)
MBBI->clearRegisterKills(SrcReg2, TRI);
}
return true;
}
// Stop if we find another reference to CC before a branch.
if (MBBI->readsRegister(SystemZ::CC, TRI) ||
MBBI->modifiesRegister(SystemZ::CC, TRI))
return false;
// Stop if we find another assignment to the registers before the branch.
if (MBBI->modifiesRegister(SrcReg, TRI) ||
(SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
return false;
}
return false;
}
// Fill MBBs and Terminators, setting the addresses on the assumption
// that no branches need relaxation. Return the size of the function under
// this assumption.
@ -268,8 +363,12 @@ uint64_t SystemZLongBranch::initMBBInfo() {
MachineBasicBlock::iterator MI = MBB->begin();
MachineBasicBlock::iterator End = MBB->end();
while (MI != End && !MI->isTerminator()) {
Block.Size += TII->getInstSizeInBytes(MI);
MachineInstr *Current = MI;
++MI;
if (Current->isCompare() && fuseCompareAndBranch(Current))
Current->removeFromParent();
else
Block.Size += TII->getInstSizeInBytes(Current);
}
skipNonTerminators(Position, Block);

View File

@ -0,0 +1,45 @@
; Test SystemZInstrInfo::AnalyzeBranch and SystemZInstrInfo::InsertBranch.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare void @foo() noreturn
; Check a case where a separate branch is needed and where the original
; order should be reversed.
define i32 @f1(i32 %a, i32 %b) {
; CHECK-LABEL: f1:
; CHECK: clr %r2, %r3
; CHECK: jnhe .L[[LABEL:.*]]
; CHECK: br %r14
; CHECK: .L[[LABEL]]:
; CHECK: brasl %r14, foo@PLT
entry:
%cmp = icmp ult i32 %a, %b
br i1 %cmp, label %callit, label %return
callit:
call void @foo()
unreachable
return:
ret i32 1
}
; Same again with a fused compare and branch.
define i32 @f2(i32 %a) {
; CHECK-LABEL: f2:
; CHECK: cijnlh %r2, 0, .L[[LABEL:.*]]
; CHECK: br %r14
; CHECK: .L[[LABEL]]:
; CHECK: brasl %r14, foo@PLT
entry:
%cmp = icmp eq i32 %a, 0
br i1 %cmp, label %callit, label %return
callit:
call void @foo()
unreachable
return:
ret i32 1
}