[Hexagon] Update AnalyzeBranch, etc target hooks

Improved the AnalyzeBranch, InsertBranch, and RemoveBranch
functions in order to handle more of our branch instructions.
This requires changes to analyzeCompare and PredicateInstructions.
Specifically, we've added support for new value compare jumps,
improved handling of endloop, added more compare instructions,
and improved support for predicate instructions.

Differential Revision: http://reviews.llvm.org/D9559


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236876 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Brendon Cahoon 2015-05-08 16:16:29 +00:00
parent d7e20e7be8
commit 7fd56b1e4a
5 changed files with 1003 additions and 272 deletions

View File

@ -369,10 +369,10 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
if (NotAnalyzed) if (NotAnalyzed)
return false; return false;
unsigned CSz = Cond.size(); unsigned PredR, PredPos, PredRegFlags;
assert (CSz == 1 || CSz == 2); if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
unsigned PredR = Cond[CSz-1].getReg(); return false;
MachineInstr *PredI = MRI->getVRegDef(PredR); MachineInstr *PredI = MRI->getVRegDef(PredR);
if (!PredI->isCompare()) if (!PredI->isCompare())
@ -491,8 +491,10 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
// to put imm(0), followed by P in the vector Cond. // to put imm(0), followed by P in the vector Cond.
// If TB is not the header, it means that the "not-taken" path must lead // If TB is not the header, it means that the "not-taken" path must lead
// to the header. // to the header.
bool Negated = (Cond.size() > 1) ^ (TB != Header); bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
unsigned PredReg = Cond[Cond.size()-1].getReg(); unsigned PredReg, PredPos, PredRegFlags;
if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
return nullptr;
MachineInstr *CondI = MRI->getVRegDef(PredReg); MachineInstr *CondI = MRI->getVRegDef(PredReg);
unsigned CondOpc = CondI->getOpcode(); unsigned CondOpc = CondI->getOpcode();

View File

@ -115,72 +115,172 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0; return 0;
} }
// Find the hardware loop instruction used to set-up the specified loop.
// On Hexagon, we have two instructions used to set-up the hardware loop
// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
// to indicate the end of a loop.
static MachineInstr *
findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
int LOOPi;
int LOOPr;
if (EndLoopOp == Hexagon::ENDLOOP0) {
LOOPi = Hexagon::J2_loop0i;
LOOPr = Hexagon::J2_loop0r;
} else { // EndLoopOp == Hexagon::EndLOOP1
LOOPi = Hexagon::J2_loop1i;
LOOPr = Hexagon::J2_loop1r;
}
unsigned // The loop set-up instruction will be in a predecessor block
HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(),
MachineBasicBlock *FBB, PE = BB->pred_end(); PB != PE; ++PB) {
const SmallVectorImpl<MachineOperand> &Cond, // If this has been visited, already skip it.
DebugLoc DL) const{ if (!Visited.insert(*PB).second)
continue;
int BOpc = Hexagon::J2_jump; if (*PB == BB)
int BccOpc = Hexagon::J2_jumpt; continue;
for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(),
assert(TBB && "InsertBranch must not be told to insert a fallthrough"); E = (*PB)->instr_rend(); I != E; ++I) {
int Opc = I->getOpcode();
int regPos = 0; if (Opc == LOOPi || Opc == LOOPr)
// Check if ReverseBranchCondition has asked to reverse this branch return &*I;
// If we want to reverse the branch an odd number of times, we want // We've reached a different loop, which means the loop0 has been removed.
// JMP_f. if (Opc == EndLoopOp)
if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { return 0;
BccOpc = Hexagon::J2_jumpf;
regPos = 1;
} }
// Check the predecessors for the LOOP instruction.
MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
if (loop)
return loop;
}
return 0;
}
if (!FBB) { unsigned HexagonInstrInfo::InsertBranch(
if (Cond.empty()) { MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB,
// Due to a bug in TailMerging/CFG Optimization, we need to add a const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
// special case handling of a predicated jump followed by an
// unconditional jump. If not, Tail Merging and CFG Optimization go Opcode_t BOpc = Hexagon::J2_jump;
// into an infinite loop. Opcode_t BccOpc = Hexagon::J2_jumpt;
MachineBasicBlock *NewTBB, *NewFBB;
SmallVector<MachineOperand, 4> Cond; assert(TBB && "InsertBranch must not be told to insert a fallthrough");
MachineInstr *Term = MBB.getFirstTerminator();
if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, // Check if ReverseBranchCondition has asked to reverse this branch
false)) { // If we want to reverse the branch an odd number of times, we want
MachineBasicBlock *NextBB = // J2_jumpf.
std::next(MachineFunction::iterator(&MBB)); if (!Cond.empty() && Cond[0].isImm())
if (NewTBB == NextBB) { BccOpc = Cond[0].getImm();
ReverseBranchCondition(Cond);
RemoveBranch(MBB); if (!FBB) {
return InsertBranch(MBB, TBB, nullptr, Cond, DL); if (Cond.empty()) {
} // Due to a bug in TailMerging/CFG Optimization, we need to add a
// special case handling of a predicated jump followed by an
// unconditional jump. If not, Tail Merging and CFG Optimization go
// into an infinite loop.
MachineBasicBlock *NewTBB, *NewFBB;
SmallVector<MachineOperand, 4> Cond;
MachineInstr *Term = MBB.getFirstTerminator();
if (Term != MBB.end() && isPredicated(Term) &&
!AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
MachineBasicBlock *NextBB =
std::next(MachineFunction::iterator(&MBB));
if (NewTBB == NextBB) {
ReverseBranchCondition(Cond);
RemoveBranch(MBB);
return InsertBranch(MBB, TBB, nullptr, Cond, DL);
} }
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else {
// If Cond[0] is a basic block, insert ENDLOOP0.
if (Cond[0].isMBB())
BuildMI(&MBB, DL, get(Hexagon::ENDLOOP0)).addMBB(Cond[0].getMBB());
else
BuildMI(&MBB, DL,
get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
} }
return 1; BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else if (isEndLoopN(Cond[0].getImm())) {
int EndLoopOp = Cond[0].getImm();
assert(Cond[1].isMBB());
// Since we're adding an ENDLOOP, there better be a LOOP instruction.
// Check for it, and change the BB target if needed.
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
Loop->getOperand(0).setMBB(TBB);
// Add the ENDLOOP after the finding the LOOP0.
BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
} else if (isNewValueJump(Cond[0].getImm())) {
assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
// New value jump
// (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
// (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
if (Cond[2].isReg()) {
unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
} else if(Cond[2].isImm()) {
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
addImm(Cond[2].getImm()).addMBB(TBB);
} else
llvm_unreachable("Invalid condition for branching");
} else {
assert((Cond.size() == 2) && "Malformed cond vector");
const MachineOperand &RO = Cond[1];
unsigned Flags = getUndefRegState(RO.isUndef());
BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
} }
return 1;
}
assert((!Cond.empty()) &&
"Cond. cannot be empty when multiple branchings are required");
assert((!isNewValueJump(Cond[0].getImm())) &&
"NV-jump cannot be inserted with another branch");
// Special case for hardware loops. The condition is a basic block.
if (isEndLoopN(Cond[0].getImm())) {
int EndLoopOp = Cond[0].getImm();
assert(Cond[1].isMBB());
// Since we're adding an ENDLOOP, there better be a LOOP instruction.
// Check for it, and change the BB target if needed.
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
Loop->getOperand(0).setMBB(TBB);
// Add the ENDLOOP after the finding the LOOP0.
BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
} else {
const MachineOperand &RO = Cond[1];
unsigned Flags = getUndefRegState(RO.isUndef());
BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
}
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
// We don't handle ENDLOOP0 with a conditional branch in AnalyzeBranch. return 2;
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
} }
/// This function can analyze one/two way branching only and should (mostly) be
/// called by target independent side.
/// First entry is always the opcode of the branching instruction, except when
/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a
/// BB with only unconditional jump. Subsequent entries depend upon the opcode,
/// e.g. Jump_c p will have
/// Cond[0] = Jump_c
/// Cond[1] = p
/// HW-loop ENDLOOP:
/// Cond[0] = ENDLOOP
/// Cond[1] = MBB
/// New value jump:
/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
/// Cond[1] = R
/// Cond[2] = Imm
/// @note Related function is \fn findInstrPredicate which fills in
/// Cond. vector when a predicated instruction is passed to it.
/// We follow same protocol in that case too.
///
bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB, MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond, SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const { bool AllowModify) const {
TBB = nullptr; TBB = nullptr;
FBB = nullptr; FBB = nullptr;
Cond.clear();
// If the block has no terminators, it just falls into the block after it. // If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::instr_iterator I = MBB.instr_end(); MachineBasicBlock::instr_iterator I = MBB.instr_end();
@ -202,6 +302,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
do { do {
--I; --I;
if (I->isEHLabel()) if (I->isEHLabel())
// Don't analyze EH branches.
return true; return true;
} while (I != MBB.instr_begin()); } while (I != MBB.instr_begin());
@ -216,7 +317,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump && bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
I->getOperand(0).isMBB(); I->getOperand(0).isMBB();
// Delete the JMP if it's equivalent to a fall-through. // Delete the J2_jump if it's equivalent to a fall-through.
if (AllowModify && JumpToBlock && if (AllowModify && JumpToBlock &&
MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
@ -257,7 +358,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true; return true;
bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode); bool LastOpcodeHasNVJump = isNewValueJump(LastInst);
// If there is only one terminator instruction, process it. // If there is only one terminator instruction, process it.
if (LastInst && !SecondLastInst) { if (LastInst && !SecondLastInst) {
@ -265,34 +366,54 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
TBB = LastInst->getOperand(0).getMBB(); TBB = LastInst->getOperand(0).getMBB();
return false; return false;
} }
if (LastOpcode == Hexagon::ENDLOOP0) { if (isEndLoopN(LastOpcode)) {
TBB = LastInst->getOperand(0).getMBB(); TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(0));
return false; return false;
} }
if (LastOpcodeHasJMP_c) { if (LastOpcodeHasJMP_c) {
TBB = LastInst->getOperand(1).getMBB(); TBB = LastInst->getOperand(1).getMBB();
if (LastOpcodeHasNot) { Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(MachineOperand::CreateImm(0));
}
Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(0));
return false; return false;
} }
// Only supporting rr/ri versions of new-value jumps.
if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) {
TBB = LastInst->getOperand(2).getMBB();
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
return false;
}
DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
<< " with one jump\n";);
// Otherwise, don't know what this is. // Otherwise, don't know what this is.
return true; return true;
} }
bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode); bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst);
if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
TBB = SecondLastInst->getOperand(1).getMBB(); TBB = SecondLastInst->getOperand(1).getMBB();
if (SecLastOpcodeHasNot) Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
Cond.push_back(MachineOperand::CreateImm(0));
Cond.push_back(SecondLastInst->getOperand(0)); Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB(); FBB = LastInst->getOperand(0).getMBB();
return false; return false;
} }
// Only supporting rr/ri versions of new-value jumps.
if (SecLastOpcodeHasNVJump &&
(SecondLastInst->getNumExplicitOperands() == 3) &&
(LastOpcode == Hexagon::J2_jump)) {
TBB = SecondLastInst->getOperand(2).getMBB();
Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
Cond.push_back(SecondLastInst->getOperand(0));
Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMBB();
return false;
}
// If the block ends with two Hexagon:JMPs, handle it. The second one is not // If the block ends with two Hexagon:JMPs, handle it. The second one is not
// executed, so remove it. // executed, so remove it.
if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) { if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) {
@ -303,53 +424,40 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false; return false;
} }
// If the block ends with an ENDLOOP, and JMP, handle it. // If the block ends with an ENDLOOP, and J2_jump, handle it.
if (SecLastOpcode == Hexagon::ENDLOOP0 && if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) {
LastOpcode == Hexagon::J2_jump) {
TBB = SecondLastInst->getOperand(0).getMBB(); TBB = SecondLastInst->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
Cond.push_back(SecondLastInst->getOperand(0)); Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB(); FBB = LastInst->getOperand(0).getMBB();
return false; return false;
} }
DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
<< " with two jumps";);
// Otherwise, can't handle this. // Otherwise, can't handle this.
return true; return true;
} }
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
MachineBasicBlock::iterator I = MBB.end(); MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0; unsigned Count = 0;
--I; while (I != MBB.begin()) {
unsigned Opc1 = I->getOpcode(); --I;
switch (Opc1) { if (I->isDebugValue())
case Hexagon::J2_jump: continue;
case Hexagon::J2_jumpt: // Only removing branches from end of MBB.
case Hexagon::J2_jumpf: if (!I->isBranch())
case Hexagon::ENDLOOP0: return Count;
I->eraseFromParent(); if (Count && (I->getOpcode() == Hexagon::J2_jump))
break; llvm_unreachable("Malformed basic block: unconditional branch not last");
default: MBB.erase(&MBB.back());
return 0; I = MBB.end();
} ++Count;
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
unsigned Opc2 = I->getOpcode();
switch (Opc2) {
case Hexagon::J2_jumpt:
case Hexagon::J2_jumpf:
case Hexagon::ENDLOOP0:
I->eraseFromParent();
return 2;
default:
return 1;
} }
return Count;
} }
/// \brief For a comparison instruction, return the source registers in /// \brief For a comparison instruction, return the source registers in
/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction /// compares against in CmpValue. Return true if the comparison instruction
@ -361,31 +469,39 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
// Set mask and the first source register. // Set mask and the first source register.
switch (Opc) { switch (Opc) {
case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpeq: case Hexagon::C2_cmpeq:
case Hexagon::C2_cmpgtp: case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpgtup:
case Hexagon::C2_cmpgtui:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgti:
case Hexagon::C2_cmpgt: case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtp:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgtup:
case Hexagon::C4_cmpneq:
case Hexagon::C4_cmplte:
case Hexagon::C4_cmplteu:
case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpgti:
case Hexagon::C2_cmpgtui:
case Hexagon::C4_cmpneqi:
case Hexagon::C4_cmplteui:
case Hexagon::C4_cmpltei:
SrcReg = MI->getOperand(1).getReg(); SrcReg = MI->getOperand(1).getReg();
Mask = ~0; Mask = ~0;
break; break;
case Hexagon::A4_cmpbeqi:
case Hexagon::A4_cmpbeq: case Hexagon::A4_cmpbeq:
case Hexagon::A4_cmpbgtui:
case Hexagon::A4_cmpbgtu:
case Hexagon::A4_cmpbgt: case Hexagon::A4_cmpbgt:
case Hexagon::A4_cmpbgtu:
case Hexagon::A4_cmpbeqi:
case Hexagon::A4_cmpbgti:
case Hexagon::A4_cmpbgtui:
SrcReg = MI->getOperand(1).getReg(); SrcReg = MI->getOperand(1).getReg();
Mask = 0xFF; Mask = 0xFF;
break; break;
case Hexagon::A4_cmpheqi:
case Hexagon::A4_cmpheq: case Hexagon::A4_cmpheq:
case Hexagon::A4_cmphgtui:
case Hexagon::A4_cmphgtu:
case Hexagon::A4_cmphgt: case Hexagon::A4_cmphgt:
case Hexagon::A4_cmphgtu:
case Hexagon::A4_cmpheqi:
case Hexagon::A4_cmphgti:
case Hexagon::A4_cmphgtui:
SrcReg = MI->getOperand(1).getReg(); SrcReg = MI->getOperand(1).getReg();
Mask = 0xFFFF; Mask = 0xFFFF;
break; break;
@ -393,27 +509,35 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
// Set the value/second source register. // Set the value/second source register.
switch (Opc) { switch (Opc) {
case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpeq: case Hexagon::C2_cmpeq:
case Hexagon::C2_cmpgtp: case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpgtup:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgt: case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtp:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgtup:
case Hexagon::A4_cmpbeq: case Hexagon::A4_cmpbeq:
case Hexagon::A4_cmpbgtu:
case Hexagon::A4_cmpbgt: case Hexagon::A4_cmpbgt:
case Hexagon::A4_cmpbgtu:
case Hexagon::A4_cmpheq: case Hexagon::A4_cmpheq:
case Hexagon::A4_cmphgtu:
case Hexagon::A4_cmphgt: case Hexagon::A4_cmphgt:
case Hexagon::A4_cmphgtu:
case Hexagon::C4_cmpneq:
case Hexagon::C4_cmplte:
case Hexagon::C4_cmplteu:
SrcReg2 = MI->getOperand(2).getReg(); SrcReg2 = MI->getOperand(2).getReg();
return true; return true;
case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpgtui: case Hexagon::C2_cmpgtui:
case Hexagon::C2_cmpgti: case Hexagon::C2_cmpgti:
case Hexagon::C4_cmpneqi:
case Hexagon::C4_cmplteui:
case Hexagon::C4_cmpltei:
case Hexagon::A4_cmpbeqi: case Hexagon::A4_cmpbeqi:
case Hexagon::A4_cmpbgti:
case Hexagon::A4_cmpbgtui: case Hexagon::A4_cmpbgtui:
case Hexagon::A4_cmpheqi: case Hexagon::A4_cmpheqi:
case Hexagon::A4_cmphgti:
case Hexagon::A4_cmphgtui: case Hexagon::A4_cmphgtui:
SrcReg2 = 0; SrcReg2 = 0;
Value = MI->getOperand(2).getImm(); Value = MI->getOperand(2).getImm();
@ -731,6 +855,16 @@ bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
return false; return false;
} }
bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
const uint64_t F = MI->getDesc().TSFlags;
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
}
bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
}
bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4; return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
} }
@ -881,148 +1015,51 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
bool HexagonInstrInfo:: bool HexagonInstrInfo::
PredicateInstruction(MachineInstr *MI, PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Cond) const { const SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond.empty() || isEndLoopN(Cond[0].getImm())) {
DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
return false;
}
int Opc = MI->getOpcode(); int Opc = MI->getOpcode();
assert (isPredicable(MI) && "Expected predicable instruction"); assert (isPredicable(MI) && "Expected predicable instruction");
bool invertJump = (!Cond.empty() && Cond[0].isImm() && bool invertJump = predOpcodeHasNot(Cond);
(Cond[0].getImm() == 0));
// This will change MI's opcode to its predicate version. // We have to predicate MI "in place", i.e. after this function returns,
// However, its operand list is still the old one, i.e. the // MI will need to be transformed into a predicated form. To avoid com-
// non-predicate one. // plicated manipulations with the operands (handling tied operands,
MI->setDesc(get(getCondOpcode(Opc, invertJump))); // etc.), build a new temporary instruction, then overwrite MI with it.
int oper = -1; MachineBasicBlock &B = *MI->getParent();
unsigned int GAIdx = 0; DebugLoc DL = MI->getDebugLoc();
unsigned PredOpc = getCondOpcode(Opc, invertJump);
// Indicates whether the current MI has a GlobalAddress operand MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
bool hasGAOpnd = false; unsigned NOp = 0, NumOps = MI->getNumOperands();
std::vector<MachineOperand> tmpOpnds; while (NOp < NumOps) {
MachineOperand &Op = MI->getOperand(NOp);
// Indicates whether we need to shift operands to right. if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
bool needShift = true; break;
T.addOperand(Op);
// The predicate is ALWAYS the FIRST input operand !!! NOp++;
if (MI->getNumOperands() == 0) {
// The non-predicate version of MI does not take any operands,
// i.e. no outs and no ins. In this condition, the predicate
// operand will be directly placed at Operands[0]. No operand
// shift is needed.
// Example: BARRIER
needShift = false;
oper = -1;
}
else if ( MI->getOperand(MI->getNumOperands()-1).isReg()
&& MI->getOperand(MI->getNumOperands()-1).isDef()
&& !MI->getOperand(MI->getNumOperands()-1).isImplicit()) {
// The non-predicate version of MI does not have any input operands.
// In this condition, we extend the length of Operands[] by one and
// copy the original last operand to the newly allocated slot.
// At this moment, it is just a place holder. Later, we will put
// predicate operand directly into it. No operand shift is needed.
// Example: r0=BARRIER (this is a faked insn used here for illustration)
MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
needShift = false;
oper = MI->getNumOperands() - 2;
}
else {
// We need to right shift all input operands by one. Duplicate the
// last operand into the newly allocated slot.
MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
} }
if (needShift) unsigned PredReg, PredRegPos, PredRegFlags;
{ bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
// Operands[ MI->getNumOperands() - 2 ] has been copied into (void)GotPredReg;
// Operands[ MI->getNumOperands() - 1 ], so we start from assert(GotPredReg);
// Operands[ MI->getNumOperands() - 3 ]. T.addReg(PredReg, PredRegFlags);
// oper is a signed int. while (NOp < NumOps)
// It is ok if "MI->getNumOperands()-3" is -3, -2, or -1. T.addOperand(MI->getOperand(NOp++));
for (oper = MI->getNumOperands() - 3; oper >= 0; --oper)
{
MachineOperand &MO = MI->getOperand(oper);
// Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7] MI->setDesc(get(PredOpc));
// <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1> while (unsigned n = MI->getNumOperands())
// /\~ MI->RemoveOperand(n-1);
// /||\~ for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
// || MI->addOperand(T->getOperand(i));
// Predicate Operand here
if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) {
break;
}
if (MO.isReg()) {
MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
MO.isImplicit(), MO.isKill(),
MO.isDead(), MO.isUndef(),
MO.isDebug());
}
else if (MO.isImm()) {
MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
}
else if (MO.isGlobal()) {
// MI can not have more than one GlobalAddress operand.
assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd");
// There is no member function called "ChangeToGlobalAddress" in the MachineBasicBlock::instr_iterator TI = &*T;
// MachineOperand class (not like "ChangeToRegister" and B.erase(TI);
// "ChangeToImmediate"). So we have to remove them from Operands[] list
// first, and then add them back after we have inserted the predicate
// operand. tmpOpnds[] is to remember these operands before we remove
// them.
tmpOpnds.push_back(MO);
// Operands[oper] is a GlobalAddress operand; MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
// Operands[oper+1] has been copied into Operands[oper+2]; MRI.clearKillFlags(PredReg);
hasGAOpnd = true;
GAIdx = oper;
continue;
}
else {
llvm_unreachable("Unexpected operand type");
}
}
}
int regPos = invertJump ? 1 : 0;
MachineOperand PredMO = Cond[regPos];
// [oper] now points to the last explicit Def. Predicate operand must be
// located at [oper+1]. See diagram above.
// This assumes that the predicate is always the first operand,
// i.e. Operands[0+numResults], in the set of inputs
// It is better to have an assert here to check this. But I don't know how
// to write this assert because findFirstPredOperandIdx() would return -1
if (oper < -1) oper = -1;
MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
PredMO.isImplicit(), false,
PredMO.isDead(), PredMO.isUndef(),
PredMO.isDebug());
MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo();
RegInfo.clearKillFlags(PredMO.getReg());
if (hasGAOpnd)
{
unsigned int i;
// Operands[GAIdx] is the original GlobalAddress operand, which is
// already copied into tmpOpnds[0].
// Operands[GAIdx] now stores a copy of Operands[GAIdx-1]
// Operands[GAIdx+1] has already been copied into Operands[GAIdx+2],
// so we start from [GAIdx+2]
for (i = GAIdx + 2; i < MI->getNumOperands(); ++i)
tmpOpnds.push_back(MI->getOperand(i));
// Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ]
// It is very important that we always remove from the end of Operands[]
// MI->getNumOperands() is at least 2 if program goes to here.
for (i = MI->getNumOperands() - 1; i > GAIdx; --i)
MI->RemoveOperand(i);
for (i = 0; i < tmpOpnds.size(); ++i)
MI->addOperand(tmpOpnds[i]);
}
return true; return true;
} }
@ -1135,17 +1172,20 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
// //
// We indicate that we want to reverse the branch by // We indicate that we want to reverse the branch by
// inserting a 0 at the beginning of the Cond vector. // inserting the reversed branching opcode.
// //
bool HexagonInstrInfo:: bool HexagonInstrInfo::ReverseBranchCondition(
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { SmallVectorImpl<MachineOperand> &Cond) const {
if (!Cond.empty() && Cond[0].isMBB()) if (Cond.empty())
return true; return true;
if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
Cond.erase(Cond.begin()); Opcode_t opcode = Cond[0].getImm();
} else { //unsigned temp;
Cond.insert(Cond.begin(), MachineOperand::CreateImm(0)); assert(get(opcode).isBranch() && "Should be a branching condition.");
} if (isEndLoopN(opcode))
return true;
Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode);
Cond[0].setImm(NewOpcode);
return false; return false;
} }
@ -1583,13 +1623,12 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
return false; return false;
} }
bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const {
return (getAddrMode(MI) == HexagonII::PostInc); return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
} }
bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
const uint64_t F = MI->getDesc().TSFlags; return (getAddrMode(MI) == HexagonII::PostInc);
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
} }
// Returns true, if any one of the operands is a dot new // Returns true, if any one of the operands is a dot new
@ -1944,8 +1983,36 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
(Opcode == Hexagon::J2_jumpf); (Opcode == Hexagon::J2_jumpf);
} }
bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const { bool HexagonInstrInfo::predOpcodeHasNot(
return (Opcode == Hexagon::J2_jumpf) || const SmallVectorImpl<MachineOperand> &Cond) const {
(Opcode == Hexagon::J2_jumpfnewpt) || if (Cond.empty() || !isPredicated(Cond[0].getImm()))
(Opcode == Hexagon::J2_jumpfnew); return false;
return !isPredicatedTrue(Cond[0].getImm());
} }
bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const {
return (Opcode == Hexagon::ENDLOOP0 ||
Opcode == Hexagon::ENDLOOP1);
}
bool HexagonInstrInfo::getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
unsigned &PredReg, unsigned &PredRegPos,
unsigned &PredRegFlags) const {
if (Cond.empty())
return false;
assert(Cond.size() == 2);
if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) {
DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
return false;
}
PredReg = Cond[1].getReg();
PredRegPos = 1;
// See IfConversion.cpp why we add RegState::Implicit | RegState::Undef
PredRegFlags = 0;
if (Cond[1].isImplicit())
PredRegFlags = RegState::Implicit;
if (Cond[1].isUndef())
PredRegFlags |= RegState::Undef;
return true;
}

View File

@ -32,9 +32,10 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
virtual void anchor(); virtual void anchor();
const HexagonRegisterInfo RI; const HexagonRegisterInfo RI;
const HexagonSubtarget &Subtarget; const HexagonSubtarget &Subtarget;
typedef unsigned Opcode_t;
public: public:
typedef unsigned Opcode_t;
explicit HexagonInstrInfo(HexagonSubtarget &ST); explicit HexagonInstrInfo(HexagonSubtarget &ST);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
@ -185,6 +186,7 @@ public:
bool isConditionalStore(const MachineInstr* MI) const; bool isConditionalStore(const MachineInstr* MI) const;
bool isNewValueInst(const MachineInstr* MI) const; bool isNewValueInst(const MachineInstr* MI) const;
bool isNewValue(const MachineInstr* MI) const; bool isNewValue(const MachineInstr* MI) const;
bool isNewValue(Opcode_t Opcode) const;
bool isDotNewInst(const MachineInstr* MI) const; bool isDotNewInst(const MachineInstr* MI) const;
int GetDotOldOp(const int opc) const; int GetDotOldOp(const int opc) const;
int GetDotNewOp(const MachineInstr* MI) const; int GetDotNewOp(const MachineInstr* MI) const;
@ -200,6 +202,7 @@ public:
bool isNewValueStore(const MachineInstr* MI) const; bool isNewValueStore(const MachineInstr* MI) const;
bool isNewValueStore(unsigned Opcode) const; bool isNewValueStore(unsigned Opcode) const;
bool isNewValueJump(const MachineInstr* MI) const; bool isNewValueJump(const MachineInstr* MI) const;
bool isNewValueJump(Opcode_t Opcode) const;
bool isNewValueJumpCandidate(const MachineInstr *MI) const; bool isNewValueJumpCandidate(const MachineInstr *MI) const;
@ -217,7 +220,11 @@ public:
bool NonExtEquivalentExists (const MachineInstr *MI) const; bool NonExtEquivalentExists (const MachineInstr *MI) const;
short getNonExtOpcode(const MachineInstr *MI) const; short getNonExtOpcode(const MachineInstr *MI) const;
bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
bool PredOpcodeHasNot(Opcode_t Opcode) const; bool predOpcodeHasNot(const SmallVectorImpl<MachineOperand> &Cond) const;
bool isEndLoopN(Opcode_t Opcode) const;
bool getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
unsigned &PredReg, unsigned &PredRegPos,
unsigned &PredRegFlags) const;
int getCondOpcode(int Opc, bool sense) const; int getCondOpcode(int Opc, bool sense) const;
}; };

View File

@ -0,0 +1,599 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK-LABEL: @test00
; CHECK: p0 = cmp.eq(r1:0, r3:2)
define i32 @test00(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpeqp(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test01
; CHECK: p0 = cmp.gt(r1:0, r3:2)
define i32 @test01(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpgtp(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test02
; CHECK: p0 = cmp.gtu(r1:0, r3:2)
define i32 @test02(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpgtup(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test10
; CHECK: r0 = cmp.eq(r0, r1)
define i32 @test10(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpeq(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test11
; CHECK: r0 = !cmp.eq(r0, r1)
define i32 @test11(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpneq(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test12
; CHECK: r0 = cmp.eq(r0, #23)
define i32 @test12(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpeqi(i32 %Rs, i32 23)
ret i32 %0
}
; CHECK-LABEL: @test13
; CHECK: r0 = !cmp.eq(r0, #47)
define i32 @test13(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpneqi(i32 %Rs, i32 47)
ret i32 %0
}
; CHECK-LABEL: @test20
; CHECK: p0 = cmpb.eq(r0, r1)
define i32 @test20(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbeq(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test21
; CHECK: p0 = cmpb.gt(r0, r1)
define i32 @test21(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgt(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test22
; CHECK: p0 = cmpb.gtu(r0, r1)
define i32 @test22(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgtu(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test23
; CHECK: p0 = cmpb.eq(r0, #56)
define i32 @test23(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbeqi(i32 %Rs, i32 56)
ret i32 %0
}
; CHECK-LABEL: @test24
; CHECK: p0 = cmpb.gt(r0, #29)
define i32 @test24(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgti(i32 %Rs, i32 29)
ret i32 %0
}
; CHECK-LABEL: @test25
; CHECK: p0 = cmpb.gtu(r0, #111)
define i32 @test25(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgtui(i32 %Rs, i32 111)
ret i32 %0
}
; CHECK-LABEL: @test30
; CHECK: p0 = cmph.eq(r0, r1)
define i32 @test30(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpheq(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test31
; CHECK: p0 = cmph.gt(r0, r1)
define i32 @test31(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgt(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test32
; CHECK: p0 = cmph.gtu(r0, r1)
define i32 @test32(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgtu(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test33
; CHECK: p0 = cmph.eq(r0, #-123)
define i32 @test33(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpheqi(i32 %Rs, i32 -123)
ret i32 %0
}
; CHECK-LABEL: @test34
; CHECK: p0 = cmph.gt(r0, #-3)
define i32 @test34(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgti(i32 %Rs, i32 -3)
ret i32 %0
}
; CHECK-LABEL: @test35
; CHECK: p0 = cmph.gtu(r0, #13)
define i32 @test35(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgtui(i32 %Rs, i32 13)
ret i32 %0
}
; CHECK-LABEL: @test40
; CHECK: r1:0 = vmux(p0, r3:2, r5:4)
define i64 @test40(i32 %Pu, i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.C2.vmux(i32 %Pu, i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test41
; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2))
define i32 @test41(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test50
; CHECK: r1:0 = add(r1:0, r3:2)
define i64 @test50(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test51
; CHECK: r1:0 = add(r1:0, r3:2):sat
define i64 @test51(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addpsat(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test52
; CHECK: r1:0 = sub(r1:0, r3:2)
define i64 @test52(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.subp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test53
; CHECK: r1:0 = add(r0, r3:2)
define i64 @test53(i32 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addsp(i32 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test54
; CHECK: r1:0 = and(r1:0, r3:2)
define i64 @test54(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.andp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test55
; CHECK: r1:0 = or(r1:0, r3:2)
define i64 @test55(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.orp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test56
; CHECK: r1:0 = xor(r1:0, r3:2)
define i64 @test56(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.xorp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test57
; CHECK: r1:0 = and(r1:0, ~r3:2)
define i64 @test57(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.andnp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test58
; CHECK: r1:0 = or(r1:0, ~r3:2)
define i64 @test58(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.ornp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test60
; CHECK: r0 = add(r0.l, r1.l)
define i32 @test60(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test61
; CHECK: r0 = add(r0.l, r1.h)
define i32 @test61(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test62
; CHECK: r0 = add(r0.l, r1.l):sat
define i32 @test62(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test63
; CHECK: r0 = add(r0.l, r1.h):sat
define i32 @test63(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test64
; CHECK: r0 = add(r0.l, r1.l):<<16
define i32 @test64(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test65
; CHECK: r0 = add(r0.l, r1.h):<<16
define i32 @test65(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test66
; CHECK: r0 = add(r0.h, r1.l):<<16
define i32 @test66(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test67
; CHECK: r0 = add(r0.h, r1.h):<<16
define i32 @test67(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test68
; CHECK: r0 = add(r0.l, r1.l):sat:<<16
define i32 @test68(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test69
; CHECK: r0 = add(r0.l, r1.h):sat:<<16
define i32 @test69(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test6A
; CHECK: r0 = add(r0.h, r1.l):sat:<<16
define i32 @test6A(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test6B
; CHECK: r0 = add(r0.h, r1.h):sat:<<16
define i32 @test6B(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test70
; CHECK: r0 = sub(r0.l, r1.l)
define i32 @test70(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test71
; CHECK: r0 = sub(r0.l, r1.h)
define i32 @test71(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test72
; CHECK: r0 = sub(r0.l, r1.l):sat
define i32 @test72(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test73
; CHECK: r0 = sub(r0.l, r1.h):sat
define i32 @test73(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test74
; CHECK: r0 = sub(r0.l, r1.l):<<16
define i32 @test74(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test75
; CHECK: r0 = sub(r0.l, r1.h):<<16
define i32 @test75(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test76
; CHECK: r0 = sub(r0.h, r1.l):<<16
define i32 @test76(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test77
; CHECK: r0 = sub(r0.h, r1.h):<<16
define i32 @test77(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test78
; CHECK: r0 = sub(r0.l, r1.l):sat:<<16
define i32 @test78(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test79
; CHECK: r0 = sub(r0.l, r1.h):sat:<<16
define i32 @test79(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test7A
; CHECK: r0 = sub(r0.h, r1.l):sat:<<16
define i32 @test7A(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test7B
; CHECK: r0 = sub(r0.h, r1.h):sat:<<16
define i32 @test7B(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test90
; CHECK: r0 = and(#1, asl(r0, #2))
define i32 @test90(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.andi.asl.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test91
; CHECK: r0 = or(#1, asl(r0, #2))
define i32 @test91(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.ori.asl.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test92
; CHECK: r0 = add(#1, asl(r0, #2))
define i32 @test92(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.addi.asl.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test93
; CHECK: r0 = sub(#1, asl(r0, #2))
define i32 @test93(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.subi.asl.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test94
; CHECK: r0 = and(#1, lsr(r0, #2))
define i32 @test94(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test95
; CHECK: r0 = or(#1, lsr(r0, #2))
define i32 @test95(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test96
; CHECK: r0 = add(#1, lsr(r0, #2))
define i32 @test96(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test97
; CHECK: r0 = sub(#1, lsr(r0, #2))
define i32 @test97(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test100
; CHECK: r1:0 = bitsplit(r0, r1)
define i64 @test100(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.bitsplit(i32 %Rs, i32 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test101
; CHECK: r0 = modwrap(r0, r1)
define i32 @test101(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test102
; CHECK: r0 = parity(r1:0, r3:2)
define i32 @test102(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S2.parityp(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test103
; CHECK: r0 = parity(r0, r1)
define i32 @test103(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.parity(i32 %Rs, i32 %Rt)
ret i32 %0
}
declare i32 @llvm.hexagon.C2.cmpeqp(i64, i64) #1
declare i32 @llvm.hexagon.C2.cmpgtp(i64, i64) #1
declare i32 @llvm.hexagon.C2.cmpgtup(i64, i64) #1
declare i32 @llvm.hexagon.A4.rcmpeq(i32, i32) #1
declare i32 @llvm.hexagon.A4.rcmpneq(i32, i32) #1
declare i32 @llvm.hexagon.A4.rcmpeqi(i32, i32) #1
declare i32 @llvm.hexagon.A4.rcmpneqi(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbeq(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbgt(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbgtu(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbeqi(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbgti(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbgtui(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpheq(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmphgt(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmphgtu(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpheqi(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmphgti(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmphgtui(i32, i32) #1
declare i64 @llvm.hexagon.C2.vmux(i32, i64, i64) #1
declare i32 @llvm.hexagon.A4.vcmpbeq.any(i64, i64) #1
declare i64 @llvm.hexagon.A2.addp(i64, i64) #1
declare i64 @llvm.hexagon.A2.addpsat(i64, i64) #1
declare i64 @llvm.hexagon.A2.subp(i64, i64) #1
declare i64 @llvm.hexagon.A2.addsp(i32, i64) #1
declare i64 @llvm.hexagon.A2.andp(i64, i64) #1
declare i64 @llvm.hexagon.A2.orp(i64, i64) #1
declare i64 @llvm.hexagon.A2.xorp(i64, i64) #1
declare i64 @llvm.hexagon.A4.ornp(i64, i64) #1
declare i64 @llvm.hexagon.A4.andnp(i64, i64) #1
declare i32 @llvm.hexagon.A2.addh.l16.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.l16.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.lh(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.hh(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.l16.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.l16.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.lh(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.hh(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32, i32) #1
declare i64 @llvm.hexagon.A4.bitsplit(i32, i32) #1
declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #1
declare i32 @llvm.hexagon.S2.parityp(i64, i64) #1
declare i32 @llvm.hexagon.S4.parity(i32, i32) #1
declare i32 @llvm.hexagon.S4.andi.asl.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.ori.asl.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.addi.asl.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.subi.asl.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.andi.lsr.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.ori.lsr.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.addi.lsr.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.subi.lsr.ri(i32, i32, i32) #1
attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }

View File

@ -0,0 +1,56 @@
; RUN: llc -march=hexagon -O2 < %s | FileCheck %s
define void @foo(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind optsize {
entry:
%cmp = icmp sgt i32 %n, 100
br i1 %cmp, label %for.body.preheader, label %for.cond4.preheader
; CHECK: endloop0
; CHECK: endloop0
; CHECK-NOT: endloop0
for.body.preheader:
br label %for.body
for.cond4.preheader:
%cmp113 = icmp sgt i32 %n, 0
br i1 %cmp113, label %for.body7.preheader, label %if.end
for.body7.preheader:
br label %for.body7
for.body:
%arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %B, %for.body.preheader ]
%arrayidx3.phi = phi i32* [ %arrayidx3.inc, %for.body ], [ %A, %for.body.preheader ]
%i.014 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%0 = load i32, i32* %arrayidx.phi, align 4
%sub = add nsw i32 %0, -1
store i32 %sub, i32* %arrayidx3.phi, align 4
%inc = add nsw i32 %i.014, 1
%exitcond = icmp eq i32 %inc, %n
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
%arrayidx3.inc = getelementptr i32, i32* %arrayidx3.phi, i32 1
br i1 %exitcond, label %if.end.loopexit, label %for.body
for.body7:
%arrayidx8.phi = phi i32* [ %arrayidx8.inc, %for.body7 ], [ %B, %for.body7.preheader ]
%arrayidx9.phi = phi i32* [ %arrayidx9.inc, %for.body7 ], [ %A, %for.body7.preheader ]
%i.117 = phi i32 [ %inc11, %for.body7 ], [ 0, %for.body7.preheader ]
%1 = load i32, i32* %arrayidx8.phi, align 4
%add = add nsw i32 %1, 1
store i32 %add, i32* %arrayidx9.phi, align 4
%inc11 = add nsw i32 %i.117, 1
%exitcond18 = icmp eq i32 %inc11, %n
%arrayidx8.inc = getelementptr i32, i32* %arrayidx8.phi, i32 1
%arrayidx9.inc = getelementptr i32, i32* %arrayidx9.phi, i32 1
br i1 %exitcond18, label %if.end.loopexit21, label %for.body7
if.end.loopexit:
br label %if.end
if.end.loopexit21:
br label %if.end
if.end:
ret void
}