mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 17:32:19 +00:00
[SystemZ] Use BRCT and BRCTG to eliminate add-&-compare sequences
This patch just uses a peephole test for "add; compare; branch" sequences within a single block. The IR optimizers already convert loops to decrement-and-branch-on-nonzero form in some cases, so even this simplistic test triggers many times during a clang bootstrap and projects/test-suite run. It looks like there are still cases where we need to more strongly prefer branches on nonzero though. E.g. I saw a case where a loop that started out with a check for 0 ended up with a check for -1. I'll try to look at that sometime. I ended up adding the Reference class because MachineInstr::readsRegister() doesn't check for subregisters (by design, as far as I could tell). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187723 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f8e16c6f5a
commit
9379557478
@ -28,10 +28,38 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
STATISTIC(BranchOnCounts, "Number of branch-on-count instructions");
|
||||
STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
|
||||
STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
|
||||
|
||||
namespace {
|
||||
// Represents the references to a particular register in one or more
|
||||
// instructions.
|
||||
struct Reference {
|
||||
Reference()
|
||||
: Def(false), Use(false), IndirectDef(false), IndirectUse(false) {}
|
||||
|
||||
Reference &operator|=(const Reference &Other) {
|
||||
Def |= Other.Def;
|
||||
IndirectDef |= Other.IndirectDef;
|
||||
Use |= Other.Use;
|
||||
IndirectUse |= Other.IndirectUse;
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator bool() const { return Def || Use; }
|
||||
|
||||
// True if the register is defined or used in some form, either directly or
|
||||
// via a sub- or super-register.
|
||||
bool Def;
|
||||
bool Use;
|
||||
|
||||
// True if the register is defined or used indirectly, by a sub- or
|
||||
// super-register.
|
||||
bool IndirectDef;
|
||||
bool IndirectUse;
|
||||
};
|
||||
|
||||
class SystemZElimCompare : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
@ -46,6 +74,9 @@ namespace {
|
||||
bool runOnMachineFunction(MachineFunction &F);
|
||||
|
||||
private:
|
||||
Reference getRegReferences(MachineInstr *MI, unsigned Reg);
|
||||
bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
|
||||
SmallVectorImpl<MachineInstr *> &CCUsers);
|
||||
bool convertToLoadAndTest(MachineInstr *MI);
|
||||
bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
|
||||
SmallVectorImpl<MachineInstr *> &CCUsers);
|
||||
@ -99,6 +130,80 @@ static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Describe the references to Reg in MI, including sub- and super-registers.
|
||||
Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
|
||||
Reference Ref;
|
||||
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
|
||||
const MachineOperand &MO = MI->getOperand(I);
|
||||
if (MO.isReg()) {
|
||||
if (unsigned MOReg = MO.getReg()) {
|
||||
if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) {
|
||||
if (MO.isUse()) {
|
||||
Ref.Use = true;
|
||||
Ref.IndirectUse |= (MOReg != Reg);
|
||||
}
|
||||
if (MO.isDef()) {
|
||||
Ref.Def = true;
|
||||
Ref.IndirectDef |= (MOReg != Reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ref;
|
||||
}
|
||||
|
||||
// Compare compares the result of MI against zero. If MI is an addition
|
||||
// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
|
||||
// and convert the branch to a BRCT(G). Return true on success.
|
||||
bool
|
||||
SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
|
||||
SmallVectorImpl<MachineInstr *> &CCUsers) {
|
||||
// Check whether we have an addition of -1.
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
unsigned BRCT;
|
||||
if (Opcode == SystemZ::AHI)
|
||||
BRCT = SystemZ::BRCT;
|
||||
else if (Opcode == SystemZ::AGHI)
|
||||
BRCT = SystemZ::BRCTG;
|
||||
else
|
||||
return false;
|
||||
if (MI->getOperand(2).getImm() != -1)
|
||||
return false;
|
||||
|
||||
// Check whether we have a single JLH.
|
||||
if (CCUsers.size() != 1)
|
||||
return false;
|
||||
MachineInstr *Branch = CCUsers[0];
|
||||
if (Branch->getOpcode() != SystemZ::BRC ||
|
||||
Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP ||
|
||||
Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE)
|
||||
return false;
|
||||
|
||||
// We already know that there are no references to the register between
|
||||
// MI and Compare. Make sure that there are also no references between
|
||||
// Compare and Branch.
|
||||
unsigned SrcReg = Compare->getOperand(0).getReg();
|
||||
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
|
||||
for (++MBBI; MBBI != MBBE; ++MBBI)
|
||||
if (getRegReferences(MBBI, SrcReg))
|
||||
return false;
|
||||
|
||||
// The transformation is OK. Rebuild Branch as a BRCT(G).
|
||||
MachineOperand Target(Branch->getOperand(2));
|
||||
Branch->RemoveOperand(2);
|
||||
Branch->RemoveOperand(1);
|
||||
Branch->RemoveOperand(0);
|
||||
Branch->setDesc(TII->get(BRCT));
|
||||
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(Target)
|
||||
.addReg(SystemZ::CC, RegState::ImplicitDefine);
|
||||
MI->removeFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
// If MI is a load instruction, try to convert it into a LOAD AND TEST.
|
||||
// Return true on success.
|
||||
bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) {
|
||||
@ -210,21 +315,32 @@ optimizeCompareZero(MachineInstr *Compare,
|
||||
unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
|
||||
MachineBasicBlock *MBB = Compare->getParent();
|
||||
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->begin();
|
||||
bool SeenUseOfCC = false;
|
||||
Reference CCRefs;
|
||||
Reference SrcRefs;
|
||||
while (MBBI != MBBE) {
|
||||
--MBBI;
|
||||
MachineInstr *MI = MBBI;
|
||||
if (resultTests(MI, SrcReg, SrcSubReg) &&
|
||||
((!SeenUseOfCC && convertToLoadAndTest(MI)) ||
|
||||
adjustCCMasksForInstr(MI, Compare, CCUsers))) {
|
||||
EliminatedComparisons += 1;
|
||||
return true;
|
||||
if (resultTests(MI, SrcReg, SrcSubReg)) {
|
||||
// Try to remove both MI and Compare by converting a branch to BRCT(G).
|
||||
// We don't care in this case whether CC is modified between MI and
|
||||
// Compare.
|
||||
if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) {
|
||||
BranchOnCounts += 1;
|
||||
return true;
|
||||
}
|
||||
// Try to eliminate Compare by reusing a CC result from MI.
|
||||
if ((!CCRefs && convertToLoadAndTest(MI)) ||
|
||||
(!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) {
|
||||
EliminatedComparisons += 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (MI->modifiesRegister(SrcReg, TRI) ||
|
||||
MI->modifiesRegister(SystemZ::CC, TRI))
|
||||
SrcRefs |= getRegReferences(MI, SrcReg);
|
||||
if (SrcRefs.Def)
|
||||
return false;
|
||||
CCRefs |= getRegReferences(MI, SystemZ::CC);
|
||||
if (CCRefs.Use && CCRefs.Def)
|
||||
return false;
|
||||
if (MI->readsRegister(SystemZ::CC, TRI))
|
||||
SeenUseOfCC = true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -316,13 +432,12 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock *MBB) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (MI->definesRegister(SystemZ::CC, TRI)) {
|
||||
Reference CCRefs(getRegReferences(MI, SystemZ::CC));
|
||||
if (CCRefs.Def) {
|
||||
CCUsers.clear();
|
||||
CompleteCCUsers = true;
|
||||
} else if (MI->modifiesRegister(SystemZ::CC, TRI))
|
||||
CompleteCCUsers = false;
|
||||
|
||||
if (CompleteCCUsers && MI->readsRegister(SystemZ::CC, TRI))
|
||||
CompleteCCUsers = !CCRefs.IndirectDef;
|
||||
}
|
||||
if (CompleteCCUsers && CCRefs.Use)
|
||||
CCUsers.push_back(MI);
|
||||
}
|
||||
return Changed;
|
||||
|
@ -684,6 +684,14 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const {
|
||||
MI->getOperand(0).getImm(),
|
||||
MI->getOperand(1).getImm(), &MI->getOperand(2));
|
||||
|
||||
case SystemZ::BRCT:
|
||||
return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP,
|
||||
SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
|
||||
|
||||
case SystemZ::BRCTG:
|
||||
return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP,
|
||||
SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
|
||||
|
||||
case SystemZ::CIJ:
|
||||
case SystemZ::CRJ:
|
||||
return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP,
|
||||
|
@ -69,7 +69,15 @@ namespace SystemZII {
|
||||
|
||||
// An instruction that peforms a 64-bit signed comparison and branches
|
||||
// on the result.
|
||||
BranchCG
|
||||
BranchCG,
|
||||
|
||||
// An instruction that decrements a 32-bit register and branches if
|
||||
// the result is nonzero.
|
||||
BranchCT,
|
||||
|
||||
// An instruction that decrements a 64-bit register and branches if
|
||||
// the result is nonzero.
|
||||
BranchCTG
|
||||
};
|
||||
// Information about a branch instruction.
|
||||
struct Branch {
|
||||
|
@ -148,6 +148,7 @@ namespace {
|
||||
bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
|
||||
bool mustRelaxABranch();
|
||||
void setWorstCaseAddresses();
|
||||
void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode);
|
||||
void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode);
|
||||
void relaxBranch(TerminatorInfo &Terminator);
|
||||
void relaxBranches();
|
||||
@ -218,6 +219,11 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
|
||||
// Relaxes to BRCL, which is 2 bytes longer.
|
||||
Terminator.ExtraRelaxSize = 2;
|
||||
break;
|
||||
case SystemZ::BRCT:
|
||||
case SystemZ::BRCTG:
|
||||
// Relaxes to A(G)HI and BRCL, which is 6 bytes longer.
|
||||
Terminator.ExtraRelaxSize = 6;
|
||||
break;
|
||||
case SystemZ::CRJ:
|
||||
// Relaxes to a CR/BRCL sequence, which is 2 bytes longer.
|
||||
Terminator.ExtraRelaxSize = 2;
|
||||
@ -330,6 +336,25 @@ void SystemZLongBranch::setWorstCaseAddresses() {
|
||||
}
|
||||
}
|
||||
|
||||
// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed
|
||||
// by a BRCL on the result.
|
||||
void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI,
|
||||
unsigned AddOpcode) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
BuildMI(*MBB, MI, DL, TII->get(AddOpcode))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(-1);
|
||||
MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
|
||||
.addImm(SystemZ::CCMASK_ICMP)
|
||||
.addImm(SystemZ::CCMASK_CMP_NE)
|
||||
.addOperand(MI->getOperand(2));
|
||||
// The implicit use of CC is a killing use.
|
||||
BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
// Split MI into the comparison given by CompareOpcode followed
|
||||
// a BRCL on the result.
|
||||
void SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
|
||||
@ -358,6 +383,12 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
|
||||
case SystemZ::BRC:
|
||||
Branch->setDesc(TII->get(SystemZ::BRCL));
|
||||
break;
|
||||
case SystemZ::BRCT:
|
||||
splitBranchOnCount(Branch, SystemZ::AHI);
|
||||
break;
|
||||
case SystemZ::BRCTG:
|
||||
splitBranchOnCount(Branch, SystemZ::AGHI);
|
||||
break;
|
||||
case SystemZ::CRJ:
|
||||
splitCompareBranch(Branch, SystemZ::CR);
|
||||
break;
|
||||
|
@ -82,6 +82,9 @@ bool SystemZPassConfig::addPreEmitPass() {
|
||||
// CC values (while still being worthwhile) and others that happen to make
|
||||
// the CC result more useful than it was originally.
|
||||
//
|
||||
// Another reason is that we only want to use BRANCH ON COUNT in cases
|
||||
// where we know that the count register is not going to be spilled.
|
||||
//
|
||||
// Doing it so late makes it more likely that a register will be reused
|
||||
// between the comparison and the branch, but it isn't clear whether
|
||||
// preventing that would be a win or not.
|
||||
|
68
test/CodeGen/SystemZ/Large/branch-range-07.py
Normal file
68
test/CodeGen/SystemZ/Large/branch-range-07.py
Normal file
@ -0,0 +1,68 @@
|
||||
# Test 32-bit BRANCH RELATIVE ON COUNT in cases where some branches are out
|
||||
# of range.
|
||||
# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
# Construct:
|
||||
#
|
||||
# loopN:
|
||||
# load of countN
|
||||
# ...
|
||||
# loop0:
|
||||
# 0xffd8 bytes, from MVIY instructions
|
||||
# conditional branch to main
|
||||
# after0:
|
||||
# ...
|
||||
# decrement of countN
|
||||
# conditional branch to loopN
|
||||
# afterN:
|
||||
#
|
||||
# Each load occupies 4 bytes. Each decrement and branch occupies 4
|
||||
# bytes if BRCT can be used, otherwise it occupies 10 bytes (AHI + BRCL).
|
||||
# This means that loop 6 contains 5 * 4 + 0xffd8 + 5 * 4 == 0x10000 bytes
|
||||
# and is therefore (just) in range. Loop 7 is out of range.
|
||||
#
|
||||
# CHECK: brct {{%r[0-9]+}}
|
||||
# CHECK: brct {{%r[0-9]+}}
|
||||
# CHECK: brct {{%r[0-9]+}}
|
||||
# CHECK: brct {{%r[0-9]+}}
|
||||
# CHECK: brct {{%r[0-9]+}}
|
||||
# CHECK: brct {{%r[0-9]+}}
|
||||
# CHECK: ahi {{%r[0-9]+}}, -1
|
||||
# CHECK: jglh
|
||||
# CHECK: ahi {{%r[0-9]+}}, -1
|
||||
# CHECK: jglh
|
||||
|
||||
branch_blocks = 8
|
||||
main_size = 0xffd8
|
||||
|
||||
print 'define void @f1(i8 *%base, i32 *%counts) {'
|
||||
print 'entry:'
|
||||
|
||||
for i in xrange(branch_blocks - 1, -1, -1):
|
||||
print ' %%countptr%d = getelementptr i32 *%%counts, i64 %d' % (i, i)
|
||||
print ' %%initcount%d = load i32 *%%countptr%d' % (i, i)
|
||||
print ' br label %%loop%d' % i
|
||||
|
||||
print 'loop%d:' % i
|
||||
block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1)
|
||||
block2 = 'loop0' if i == 0 else 'after%d' % (i - 1)
|
||||
print (' %%count%d = phi i32 [ %%initcount%d, %%%s ],'
|
||||
' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2))
|
||||
|
||||
a, b = 1, 1
|
||||
for i in xrange(0, main_size, 6):
|
||||
a, b = b, a + b
|
||||
offset = 4096 + b % 500000
|
||||
value = a % 256
|
||||
print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
|
||||
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
|
||||
|
||||
for i in xrange(branch_blocks):
|
||||
print ' %%nextcount%d = add i32 %%count%d, -1' % (i, i)
|
||||
print ' %%test%d = icmp ne i32 %%nextcount%d, 0' % (i, i)
|
||||
print ' br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i)
|
||||
print ''
|
||||
print 'after%d:' % i
|
||||
|
||||
print ' ret void'
|
||||
print '}'
|
69
test/CodeGen/SystemZ/Large/branch-range-08.py
Normal file
69
test/CodeGen/SystemZ/Large/branch-range-08.py
Normal file
@ -0,0 +1,69 @@
|
||||
# Test 64-bit BRANCH RELATIVE ON COUNT in cases where some branches are out
|
||||
# of range.
|
||||
# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
# Construct:
|
||||
#
|
||||
# loopN:
|
||||
# load of countN
|
||||
# ...
|
||||
# loop0:
|
||||
# 0xffd8 bytes, from MVIY instructions
|
||||
# conditional branch to main
|
||||
# after0:
|
||||
# ...
|
||||
# decrement of countN
|
||||
# conditional branch to loopN
|
||||
# afterN:
|
||||
#
|
||||
# Each load occupies 6 bytes. Each decrement and branch occupies 4
|
||||
# bytes if BRCTG can be used, otherwise it occupies 10 bytes (AGHI + BRCL).
|
||||
# This means that loop 5 contains 4 * 6 + 0xffd8 + 4 * 4 == 0x10000 bytes
|
||||
# and is therefore (just) in range. Loop 6 is out of range.
|
||||
#
|
||||
# CHECK: brctg {{%r[0-9]+}}
|
||||
# CHECK: brctg {{%r[0-9]+}}
|
||||
# CHECK: brctg {{%r[0-9]+}}
|
||||
# CHECK: brctg {{%r[0-9]+}}
|
||||
# CHECK: brctg {{%r[0-9]+}}
|
||||
# CHECK: aghi {{%r[0-9]+}}, -1
|
||||
# CHECK: jglh
|
||||
# CHECK: aghi {{%r[0-9]+}}, -1
|
||||
# CHECK: jglh
|
||||
# CHECK: aghi {{%r[0-9]+}}, -1
|
||||
# CHECK: jglh
|
||||
|
||||
branch_blocks = 8
|
||||
main_size = 0xffd8
|
||||
|
||||
print 'define void @f1(i8 *%base, i64 *%counts) {'
|
||||
print 'entry:'
|
||||
|
||||
for i in xrange(branch_blocks - 1, -1, -1):
|
||||
print ' %%countptr%d = getelementptr i64 *%%counts, i64 %d' % (i, i)
|
||||
print ' %%initcount%d = load i64 *%%countptr%d' % (i, i)
|
||||
print ' br label %%loop%d' % i
|
||||
|
||||
print 'loop%d:' % i
|
||||
block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1)
|
||||
block2 = 'loop0' if i == 0 else 'after%d' % (i - 1)
|
||||
print (' %%count%d = phi i64 [ %%initcount%d, %%%s ],'
|
||||
' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2))
|
||||
|
||||
a, b = 1, 1
|
||||
for i in xrange(0, main_size, 6):
|
||||
a, b = b, a + b
|
||||
offset = 4096 + b % 500000
|
||||
value = a % 256
|
||||
print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
|
||||
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
|
||||
|
||||
for i in xrange(branch_blocks):
|
||||
print ' %%nextcount%d = add i64 %%count%d, -1' % (i, i)
|
||||
print ' %%test%d = icmp ne i64 %%nextcount%d, 0' % (i, i)
|
||||
print ' br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i)
|
||||
print ''
|
||||
print 'after%d:' % i
|
||||
|
||||
print ' ret void'
|
||||
print '}'
|
@ -5,7 +5,7 @@
|
||||
; Test that strength reduction is applied to addresses with a scale factor,
|
||||
; but that indexed addressing can still be used.
|
||||
define void @f1(i32 *%dest, i32 %a) {
|
||||
; CHECK-LABEL: f1
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK-NOT: sllg
|
||||
; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
|
||||
; CHECK: br %r14
|
||||
@ -23,3 +23,102 @@ loop:
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a loop that should be converted into dbr form and then use BRCT.
|
||||
define void @f2(i32 *%src, i32 *%dest) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: lhi [[REG:%r[0-5]]], 100
|
||||
; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
|
||||
; CHECK: brct [[REG]], [[LABEL]]
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
|
||||
%next = add i32 %count, 1
|
||||
%val = load volatile i32 *%src
|
||||
%cmp = icmp eq i32 %val, 0
|
||||
br i1 %cmp, label %loop.next, label %loop.store
|
||||
|
||||
loop.store:
|
||||
%add = add i32 %val, 1
|
||||
store volatile i32 %add, i32 *%dest
|
||||
br label %loop.next
|
||||
|
||||
loop.next:
|
||||
%cont = icmp ne i32 %next, 100
|
||||
br i1 %cont, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Like f2, but for BRCTG.
|
||||
define void @f3(i64 *%src, i64 *%dest) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: lghi [[REG:%r[0-5]]], 100
|
||||
; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
|
||||
; CHECK: brctg [[REG]], [[LABEL]]
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
|
||||
%next = add i64 %count, 1
|
||||
%val = load volatile i64 *%src
|
||||
%cmp = icmp eq i64 %val, 0
|
||||
br i1 %cmp, label %loop.next, label %loop.store
|
||||
|
||||
loop.store:
|
||||
%add = add i64 %val, 1
|
||||
store volatile i64 %add, i64 *%dest
|
||||
br label %loop.next
|
||||
|
||||
loop.next:
|
||||
%cont = icmp ne i64 %next, 100
|
||||
br i1 %cont, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a loop with a 64-bit decremented counter in which the 32-bit
|
||||
; low part of the counter is used after the decrement. This is an example
|
||||
; of a subregister use being the only thing that blocks a conversion to BRCTG.
|
||||
define void @f4(i32 *%src, i32 *%dest, i64 *%dest2, i64 %count) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: aghi [[REG:%r[0-5]]], -1
|
||||
; CHECK: lr [[REG2:%r[0-5]]], [[REG]]
|
||||
; CHECK: stg [[REG2]],
|
||||
; CHECK: jne {{\..*}}
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
|
||||
store volatile i64 %left, i64 *%dest2
|
||||
%val = load volatile i32 *%src
|
||||
%cmp = icmp eq i32 %val, 0
|
||||
br i1 %cmp, label %loop.next, label %loop.store
|
||||
|
||||
loop.store:
|
||||
%add = add i32 %val, 1
|
||||
store volatile i32 %add, i32 *%dest
|
||||
br label %loop.next
|
||||
|
||||
loop.next:
|
||||
%next = add i64 %left, -1
|
||||
%ext = zext i32 %val to i64
|
||||
%shl = shl i64 %ext, 32
|
||||
%and = and i64 %next, 4294967295
|
||||
%or = or i64 %shl, %and
|
||||
store volatile i64 %or, i64 *%dest2
|
||||
%cont = icmp ne i64 %next, 0
|
||||
br i1 %cont, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user