[Hexagon] Update AnalyzeBranch, etc target hooks

Improved the AnalyzeBranch, InsertBranch, and RemoveBranch
functions in order to handle more of our branch instructions.
This requires changes to analyzeCompare and PredicateInstructions.
Specifically, we've added support for new value compare jumps,
improved handling of endloop, added more compare instructions,
and improved support for predicate instructions.

Differential Revision: http://reviews.llvm.org/D9559


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236876 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Brendon Cahoon 2015-05-08 16:16:29 +00:00
parent d7e20e7be8
commit 7fd56b1e4a
5 changed files with 1003 additions and 272 deletions

View File

@ -369,10 +369,10 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
if (NotAnalyzed)
return false;
unsigned CSz = Cond.size();
assert (CSz == 1 || CSz == 2);
unsigned PredR = Cond[CSz-1].getReg();
unsigned PredR, PredPos, PredRegFlags;
if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
return false;
MachineInstr *PredI = MRI->getVRegDef(PredR);
if (!PredI->isCompare())
@ -491,8 +491,10 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
// to put imm(0), followed by P in the vector Cond.
// If TB is not the header, it means that the "not-taken" path must lead
// to the header.
bool Negated = (Cond.size() > 1) ^ (TB != Header);
unsigned PredReg = Cond[Cond.size()-1].getReg();
bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
unsigned PredReg, PredPos, PredRegFlags;
if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
return nullptr;
MachineInstr *CondI = MRI->getVRegDef(PredReg);
unsigned CondOpc = CondI->getOpcode();

View File

@ -115,72 +115,172 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
// Find the hardware loop instruction used to set-up the specified loop.
// On Hexagon, we have two instructions used to set-up the hardware loop
// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
// to indicate the end of a loop.
static MachineInstr *
findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
int LOOPi;
int LOOPr;
if (EndLoopOp == Hexagon::ENDLOOP0) {
LOOPi = Hexagon::J2_loop0i;
LOOPr = Hexagon::J2_loop0r;
} else { // EndLoopOp == Hexagon::EndLOOP1
LOOPi = Hexagon::J2_loop1i;
LOOPr = Hexagon::J2_loop1r;
}
unsigned
HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const{
int BOpc = Hexagon::J2_jump;
int BccOpc = Hexagon::J2_jumpt;
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
int regPos = 0;
// Check if ReverseBranchCondition has asked to reverse this branch
// If we want to reverse the branch an odd number of times, we want
// JMP_f.
if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
BccOpc = Hexagon::J2_jumpf;
regPos = 1;
// The loop set-up instruction will be in a predecessor block
for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(),
PE = BB->pred_end(); PB != PE; ++PB) {
// If this has been visited, already skip it.
if (!Visited.insert(*PB).second)
continue;
if (*PB == BB)
continue;
for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(),
E = (*PB)->instr_rend(); I != E; ++I) {
int Opc = I->getOpcode();
if (Opc == LOOPi || Opc == LOOPr)
return &*I;
// We've reached a different loop, which means the loop0 has been removed.
if (Opc == EndLoopOp)
return 0;
}
// Check the predecessors for the LOOP instruction.
MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
if (loop)
return loop;
}
return 0;
}
if (!FBB) {
if (Cond.empty()) {
// Due to a bug in TailMerging/CFG Optimization, we need to add a
// special case handling of a predicated jump followed by an
// unconditional jump. If not, Tail Merging and CFG Optimization go
// into an infinite loop.
MachineBasicBlock *NewTBB, *NewFBB;
SmallVector<MachineOperand, 4> Cond;
MachineInstr *Term = MBB.getFirstTerminator();
if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
false)) {
MachineBasicBlock *NextBB =
std::next(MachineFunction::iterator(&MBB));
if (NewTBB == NextBB) {
ReverseBranchCondition(Cond);
RemoveBranch(MBB);
return InsertBranch(MBB, TBB, nullptr, Cond, DL);
}
unsigned HexagonInstrInfo::InsertBranch(
MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
Opcode_t BOpc = Hexagon::J2_jump;
Opcode_t BccOpc = Hexagon::J2_jumpt;
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
// Check if ReverseBranchCondition has asked to reverse this branch
// If we want to reverse the branch an odd number of times, we want
// J2_jumpf.
if (!Cond.empty() && Cond[0].isImm())
BccOpc = Cond[0].getImm();
if (!FBB) {
if (Cond.empty()) {
// Due to a bug in TailMerging/CFG Optimization, we need to add a
// special case handling of a predicated jump followed by an
// unconditional jump. If not, Tail Merging and CFG Optimization go
// into an infinite loop.
MachineBasicBlock *NewTBB, *NewFBB;
SmallVector<MachineOperand, 4> Cond;
MachineInstr *Term = MBB.getFirstTerminator();
if (Term != MBB.end() && isPredicated(Term) &&
!AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
MachineBasicBlock *NextBB =
std::next(MachineFunction::iterator(&MBB));
if (NewTBB == NextBB) {
ReverseBranchCondition(Cond);
RemoveBranch(MBB);
return InsertBranch(MBB, TBB, nullptr, Cond, DL);
}
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else {
// If Cond[0] is a basic block, insert ENDLOOP0.
if (Cond[0].isMBB())
BuildMI(&MBB, DL, get(Hexagon::ENDLOOP0)).addMBB(Cond[0].getMBB());
else
BuildMI(&MBB, DL,
get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
}
return 1;
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else if (isEndLoopN(Cond[0].getImm())) {
int EndLoopOp = Cond[0].getImm();
assert(Cond[1].isMBB());
// Since we're adding an ENDLOOP, there better be a LOOP instruction.
// Check for it, and change the BB target if needed.
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
Loop->getOperand(0).setMBB(TBB);
// Add the ENDLOOP after the finding the LOOP0.
BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
} else if (isNewValueJump(Cond[0].getImm())) {
assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
// New value jump
// (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
// (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
if (Cond[2].isReg()) {
unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
} else if(Cond[2].isImm()) {
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
addImm(Cond[2].getImm()).addMBB(TBB);
} else
llvm_unreachable("Invalid condition for branching");
} else {
assert((Cond.size() == 2) && "Malformed cond vector");
const MachineOperand &RO = Cond[1];
unsigned Flags = getUndefRegState(RO.isUndef());
BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
}
return 1;
}
assert((!Cond.empty()) &&
"Cond. cannot be empty when multiple branchings are required");
assert((!isNewValueJump(Cond[0].getImm())) &&
"NV-jump cannot be inserted with another branch");
// Special case for hardware loops. The condition is a basic block.
if (isEndLoopN(Cond[0].getImm())) {
int EndLoopOp = Cond[0].getImm();
assert(Cond[1].isMBB());
// Since we're adding an ENDLOOP, there better be a LOOP instruction.
// Check for it, and change the BB target if needed.
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
Loop->getOperand(0).setMBB(TBB);
// Add the ENDLOOP after the finding the LOOP0.
BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
} else {
const MachineOperand &RO = Cond[1];
unsigned Flags = getUndefRegState(RO.isUndef());
BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
}
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
// We don't handle ENDLOOP0 with a conditional branch in AnalyzeBranch.
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
return 2;
}
/// This function can analyze one/two way branching only and should (mostly) be
/// called by target independent side.
/// First entry is always the opcode of the branching instruction, except when
/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a
/// BB with only unconditional jump. Subsequent entries depend upon the opcode,
/// e.g. Jump_c p will have
/// Cond[0] = Jump_c
/// Cond[1] = p
/// HW-loop ENDLOOP:
/// Cond[0] = ENDLOOP
/// Cond[1] = MBB
/// New value jump:
/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
/// Cond[1] = R
/// Cond[2] = Imm
/// @note Related function is \fn findInstrPredicate which fills in
/// Cond. vector when a predicated instruction is passed to it.
/// We follow same protocol in that case too.
///
bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
TBB = nullptr;
FBB = nullptr;
Cond.clear();
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::instr_iterator I = MBB.instr_end();
@ -202,6 +302,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
do {
--I;
if (I->isEHLabel())
// Don't analyze EH branches.
return true;
} while (I != MBB.instr_begin());
@ -216,7 +317,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
I->getOperand(0).isMBB();
// Delete the JMP if it's equivalent to a fall-through.
// Delete the J2_jump if it's equivalent to a fall-through.
if (AllowModify && JumpToBlock &&
MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
@ -257,7 +358,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode);
bool LastOpcodeHasNVJump = isNewValueJump(LastInst);
// If there is only one terminator instruction, process it.
if (LastInst && !SecondLastInst) {
@ -265,34 +366,54 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
TBB = LastInst->getOperand(0).getMBB();
return false;
}
if (LastOpcode == Hexagon::ENDLOOP0) {
if (isEndLoopN(LastOpcode)) {
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
return false;
}
if (LastOpcodeHasJMP_c) {
TBB = LastInst->getOperand(1).getMBB();
if (LastOpcodeHasNot) {
Cond.push_back(MachineOperand::CreateImm(0));
}
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
return false;
}
// Only supporting rr/ri versions of new-value jumps.
if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) {
TBB = LastInst->getOperand(2).getMBB();
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
return false;
}
DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
<< " with one jump\n";);
// Otherwise, don't know what this is.
return true;
}
bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst);
if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
TBB = SecondLastInst->getOperand(1).getMBB();
if (SecLastOpcodeHasNot)
Cond.push_back(MachineOperand::CreateImm(0));
Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB();
return false;
}
// Only supporting rr/ri versions of new-value jumps.
if (SecLastOpcodeHasNVJump &&
(SecondLastInst->getNumExplicitOperands() == 3) &&
(LastOpcode == Hexagon::J2_jump)) {
TBB = SecondLastInst->getOperand(2).getMBB();
Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
Cond.push_back(SecondLastInst->getOperand(0));
Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMBB();
return false;
}
// If the block ends with two Hexagon:JMPs, handle it. The second one is not
// executed, so remove it.
if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) {
@ -303,53 +424,40 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
}
// If the block ends with an ENDLOOP, and JMP, handle it.
if (SecLastOpcode == Hexagon::ENDLOOP0 &&
LastOpcode == Hexagon::J2_jump) {
// If the block ends with an ENDLOOP, and J2_jump, handle it.
if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) {
TBB = SecondLastInst->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB();
return false;
}
DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
<< " with two jumps";);
// Otherwise, can't handle this.
return true;
}
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
unsigned Opc1 = I->getOpcode();
switch (Opc1) {
case Hexagon::J2_jump:
case Hexagon::J2_jumpt:
case Hexagon::J2_jumpf:
case Hexagon::ENDLOOP0:
I->eraseFromParent();
break;
default:
return 0;
}
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
unsigned Opc2 = I->getOpcode();
switch (Opc2) {
case Hexagon::J2_jumpt:
case Hexagon::J2_jumpf:
case Hexagon::ENDLOOP0:
I->eraseFromParent();
return 2;
default:
return 1;
unsigned Count = 0;
while (I != MBB.begin()) {
--I;
if (I->isDebugValue())
continue;
// Only removing branches from end of MBB.
if (!I->isBranch())
return Count;
if (Count && (I->getOpcode() == Hexagon::J2_jump))
llvm_unreachable("Malformed basic block: unconditional branch not last");
MBB.erase(&MBB.back());
I = MBB.end();
++Count;
}
return Count;
}
/// \brief For a comparison instruction, return the source registers in
/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
@ -361,31 +469,39 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
// Set mask and the first source register.
switch (Opc) {
case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpeq:
case Hexagon::C2_cmpgtp:
case Hexagon::C2_cmpgtup:
case Hexagon::C2_cmpgtui:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgti:
case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtp:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgtup:
case Hexagon::C4_cmpneq:
case Hexagon::C4_cmplte:
case Hexagon::C4_cmplteu:
case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpgti:
case Hexagon::C2_cmpgtui:
case Hexagon::C4_cmpneqi:
case Hexagon::C4_cmplteui:
case Hexagon::C4_cmpltei:
SrcReg = MI->getOperand(1).getReg();
Mask = ~0;
break;
case Hexagon::A4_cmpbeqi:
case Hexagon::A4_cmpbeq:
case Hexagon::A4_cmpbgtui:
case Hexagon::A4_cmpbgtu:
case Hexagon::A4_cmpbgt:
case Hexagon::A4_cmpbgtu:
case Hexagon::A4_cmpbeqi:
case Hexagon::A4_cmpbgti:
case Hexagon::A4_cmpbgtui:
SrcReg = MI->getOperand(1).getReg();
Mask = 0xFF;
break;
case Hexagon::A4_cmpheqi:
case Hexagon::A4_cmpheq:
case Hexagon::A4_cmphgtui:
case Hexagon::A4_cmphgtu:
case Hexagon::A4_cmphgt:
case Hexagon::A4_cmphgtu:
case Hexagon::A4_cmpheqi:
case Hexagon::A4_cmphgti:
case Hexagon::A4_cmphgtui:
SrcReg = MI->getOperand(1).getReg();
Mask = 0xFFFF;
break;
@ -393,27 +509,35 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
// Set the value/second source register.
switch (Opc) {
case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpeq:
case Hexagon::C2_cmpgtp:
case Hexagon::C2_cmpgtup:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpeqp:
case Hexagon::C2_cmpgt:
case Hexagon::C2_cmpgtp:
case Hexagon::C2_cmpgtu:
case Hexagon::C2_cmpgtup:
case Hexagon::A4_cmpbeq:
case Hexagon::A4_cmpbgtu:
case Hexagon::A4_cmpbgt:
case Hexagon::A4_cmpbgtu:
case Hexagon::A4_cmpheq:
case Hexagon::A4_cmphgtu:
case Hexagon::A4_cmphgt:
case Hexagon::A4_cmphgtu:
case Hexagon::C4_cmpneq:
case Hexagon::C4_cmplte:
case Hexagon::C4_cmplteu:
SrcReg2 = MI->getOperand(2).getReg();
return true;
case Hexagon::C2_cmpeqi:
case Hexagon::C2_cmpgtui:
case Hexagon::C2_cmpgti:
case Hexagon::C4_cmpneqi:
case Hexagon::C4_cmplteui:
case Hexagon::C4_cmpltei:
case Hexagon::A4_cmpbeqi:
case Hexagon::A4_cmpbgti:
case Hexagon::A4_cmpbgtui:
case Hexagon::A4_cmpheqi:
case Hexagon::A4_cmphgti:
case Hexagon::A4_cmphgtui:
SrcReg2 = 0;
Value = MI->getOperand(2).getImm();
@ -731,6 +855,16 @@ bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
return false;
}
bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
const uint64_t F = MI->getDesc().TSFlags;
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
}
bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
}
bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
}
@ -881,148 +1015,51 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
bool HexagonInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond.empty() || isEndLoopN(Cond[0].getImm())) {
DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
return false;
}
int Opc = MI->getOpcode();
assert (isPredicable(MI) && "Expected predicable instruction");
bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
(Cond[0].getImm() == 0));
bool invertJump = predOpcodeHasNot(Cond);
// This will change MI's opcode to its predicate version.
// However, its operand list is still the old one, i.e. the
// non-predicate one.
MI->setDesc(get(getCondOpcode(Opc, invertJump)));
// We have to predicate MI "in place", i.e. after this function returns,
// MI will need to be transformed into a predicated form. To avoid com-
// plicated manipulations with the operands (handling tied operands,
// etc.), build a new temporary instruction, then overwrite MI with it.
int oper = -1;
unsigned int GAIdx = 0;
// Indicates whether the current MI has a GlobalAddress operand
bool hasGAOpnd = false;
std::vector<MachineOperand> tmpOpnds;
// Indicates whether we need to shift operands to right.
bool needShift = true;
// The predicate is ALWAYS the FIRST input operand !!!
if (MI->getNumOperands() == 0) {
// The non-predicate version of MI does not take any operands,
// i.e. no outs and no ins. In this condition, the predicate
// operand will be directly placed at Operands[0]. No operand
// shift is needed.
// Example: BARRIER
needShift = false;
oper = -1;
}
else if ( MI->getOperand(MI->getNumOperands()-1).isReg()
&& MI->getOperand(MI->getNumOperands()-1).isDef()
&& !MI->getOperand(MI->getNumOperands()-1).isImplicit()) {
// The non-predicate version of MI does not have any input operands.
// In this condition, we extend the length of Operands[] by one and
// copy the original last operand to the newly allocated slot.
// At this moment, it is just a place holder. Later, we will put
// predicate operand directly into it. No operand shift is needed.
// Example: r0=BARRIER (this is a faked insn used here for illustration)
MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
needShift = false;
oper = MI->getNumOperands() - 2;
}
else {
// We need to right shift all input operands by one. Duplicate the
// last operand into the newly allocated slot.
MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
MachineBasicBlock &B = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned PredOpc = getCondOpcode(Opc, invertJump);
MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
unsigned NOp = 0, NumOps = MI->getNumOperands();
while (NOp < NumOps) {
MachineOperand &Op = MI->getOperand(NOp);
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
break;
T.addOperand(Op);
NOp++;
}
if (needShift)
{
// Operands[ MI->getNumOperands() - 2 ] has been copied into
// Operands[ MI->getNumOperands() - 1 ], so we start from
// Operands[ MI->getNumOperands() - 3 ].
// oper is a signed int.
// It is ok if "MI->getNumOperands()-3" is -3, -2, or -1.
for (oper = MI->getNumOperands() - 3; oper >= 0; --oper)
{
MachineOperand &MO = MI->getOperand(oper);
unsigned PredReg, PredRegPos, PredRegFlags;
bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
(void)GotPredReg;
assert(GotPredReg);
T.addReg(PredReg, PredRegFlags);
while (NOp < NumOps)
T.addOperand(MI->getOperand(NOp++));
// Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7]
// <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1>
// /\~
// /||\~
// ||
// Predicate Operand here
if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) {
break;
}
if (MO.isReg()) {
MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
MO.isImplicit(), MO.isKill(),
MO.isDead(), MO.isUndef(),
MO.isDebug());
}
else if (MO.isImm()) {
MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
}
else if (MO.isGlobal()) {
// MI can not have more than one GlobalAddress operand.
assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd");
MI->setDesc(get(PredOpc));
while (unsigned n = MI->getNumOperands())
MI->RemoveOperand(n-1);
for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
MI->addOperand(T->getOperand(i));
// There is no member function called "ChangeToGlobalAddress" in the
// MachineOperand class (not like "ChangeToRegister" and
// "ChangeToImmediate"). So we have to remove them from Operands[] list
// first, and then add them back after we have inserted the predicate
// operand. tmpOpnds[] is to remember these operands before we remove
// them.
tmpOpnds.push_back(MO);
MachineBasicBlock::instr_iterator TI = &*T;
B.erase(TI);
// Operands[oper] is a GlobalAddress operand;
// Operands[oper+1] has been copied into Operands[oper+2];
hasGAOpnd = true;
GAIdx = oper;
continue;
}
else {
llvm_unreachable("Unexpected operand type");
}
}
}
int regPos = invertJump ? 1 : 0;
MachineOperand PredMO = Cond[regPos];
// [oper] now points to the last explicit Def. Predicate operand must be
// located at [oper+1]. See diagram above.
// This assumes that the predicate is always the first operand,
// i.e. Operands[0+numResults], in the set of inputs
// It is better to have an assert here to check this. But I don't know how
// to write this assert because findFirstPredOperandIdx() would return -1
if (oper < -1) oper = -1;
MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
PredMO.isImplicit(), false,
PredMO.isDead(), PredMO.isUndef(),
PredMO.isDebug());
MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo();
RegInfo.clearKillFlags(PredMO.getReg());
if (hasGAOpnd)
{
unsigned int i;
// Operands[GAIdx] is the original GlobalAddress operand, which is
// already copied into tmpOpnds[0].
// Operands[GAIdx] now stores a copy of Operands[GAIdx-1]
// Operands[GAIdx+1] has already been copied into Operands[GAIdx+2],
// so we start from [GAIdx+2]
for (i = GAIdx + 2; i < MI->getNumOperands(); ++i)
tmpOpnds.push_back(MI->getOperand(i));
// Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ]
// It is very important that we always remove from the end of Operands[]
// MI->getNumOperands() is at least 2 if program goes to here.
for (i = MI->getNumOperands() - 1; i > GAIdx; --i)
MI->RemoveOperand(i);
for (i = 0; i < tmpOpnds.size(); ++i)
MI->addOperand(tmpOpnds[i]);
}
MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
MRI.clearKillFlags(PredReg);
return true;
}
@ -1135,17 +1172,20 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
//
// We indicate that we want to reverse the branch by
// inserting a 0 at the beginning of the Cond vector.
// inserting the reversed branching opcode.
//
bool HexagonInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
if (!Cond.empty() && Cond[0].isMBB())
bool HexagonInstrInfo::ReverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond.empty())
return true;
if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
Cond.erase(Cond.begin());
} else {
Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
}
assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
Opcode_t opcode = Cond[0].getImm();
//unsigned temp;
assert(get(opcode).isBranch() && "Should be a branching condition.");
if (isEndLoopN(opcode))
return true;
Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode);
Cond[0].setImm(NewOpcode);
return false;
}
@ -1583,13 +1623,12 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
return false;
}
bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
return (getAddrMode(MI) == HexagonII::PostInc);
bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const {
return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
}
bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
const uint64_t F = MI->getDesc().TSFlags;
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
return (getAddrMode(MI) == HexagonII::PostInc);
}
// Returns true, if any one of the operands is a dot new
@ -1944,8 +1983,36 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
(Opcode == Hexagon::J2_jumpf);
}
bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const {
return (Opcode == Hexagon::J2_jumpf) ||
(Opcode == Hexagon::J2_jumpfnewpt) ||
(Opcode == Hexagon::J2_jumpfnew);
bool HexagonInstrInfo::predOpcodeHasNot(
const SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond.empty() || !isPredicated(Cond[0].getImm()))
return false;
return !isPredicatedTrue(Cond[0].getImm());
}
bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const {
return (Opcode == Hexagon::ENDLOOP0 ||
Opcode == Hexagon::ENDLOOP1);
}
bool HexagonInstrInfo::getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
unsigned &PredReg, unsigned &PredRegPos,
unsigned &PredRegFlags) const {
if (Cond.empty())
return false;
assert(Cond.size() == 2);
if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) {
DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
return false;
}
PredReg = Cond[1].getReg();
PredRegPos = 1;
// See IfConversion.cpp why we add RegState::Implicit | RegState::Undef
PredRegFlags = 0;
if (Cond[1].isImplicit())
PredRegFlags = RegState::Implicit;
if (Cond[1].isUndef())
PredRegFlags |= RegState::Undef;
return true;
}

View File

@ -32,9 +32,10 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
virtual void anchor();
const HexagonRegisterInfo RI;
const HexagonSubtarget &Subtarget;
typedef unsigned Opcode_t;
public:
typedef unsigned Opcode_t;
explicit HexagonInstrInfo(HexagonSubtarget &ST);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
@ -185,6 +186,7 @@ public:
bool isConditionalStore(const MachineInstr* MI) const;
bool isNewValueInst(const MachineInstr* MI) const;
bool isNewValue(const MachineInstr* MI) const;
bool isNewValue(Opcode_t Opcode) const;
bool isDotNewInst(const MachineInstr* MI) const;
int GetDotOldOp(const int opc) const;
int GetDotNewOp(const MachineInstr* MI) const;
@ -200,6 +202,7 @@ public:
bool isNewValueStore(const MachineInstr* MI) const;
bool isNewValueStore(unsigned Opcode) const;
bool isNewValueJump(const MachineInstr* MI) const;
bool isNewValueJump(Opcode_t Opcode) const;
bool isNewValueJumpCandidate(const MachineInstr *MI) const;
@ -217,7 +220,11 @@ public:
bool NonExtEquivalentExists (const MachineInstr *MI) const;
short getNonExtOpcode(const MachineInstr *MI) const;
bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
bool PredOpcodeHasNot(Opcode_t Opcode) const;
bool predOpcodeHasNot(const SmallVectorImpl<MachineOperand> &Cond) const;
bool isEndLoopN(Opcode_t Opcode) const;
bool getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
unsigned &PredReg, unsigned &PredRegPos,
unsigned &PredRegFlags) const;
int getCondOpcode(int Opc, bool sense) const;
};

View File

@ -0,0 +1,599 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK-LABEL: @test00
; CHECK: p0 = cmp.eq(r1:0, r3:2)
define i32 @test00(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpeqp(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test01
; CHECK: p0 = cmp.gt(r1:0, r3:2)
define i32 @test01(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpgtp(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test02
; CHECK: p0 = cmp.gtu(r1:0, r3:2)
define i32 @test02(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpgtup(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test10
; CHECK: r0 = cmp.eq(r0, r1)
define i32 @test10(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpeq(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test11
; CHECK: r0 = !cmp.eq(r0, r1)
define i32 @test11(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpneq(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test12
; CHECK: r0 = cmp.eq(r0, #23)
define i32 @test12(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpeqi(i32 %Rs, i32 23)
ret i32 %0
}
; CHECK-LABEL: @test13
; CHECK: r0 = !cmp.eq(r0, #47)
define i32 @test13(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpneqi(i32 %Rs, i32 47)
ret i32 %0
}
; CHECK-LABEL: @test20
; CHECK: p0 = cmpb.eq(r0, r1)
define i32 @test20(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbeq(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test21
; CHECK: p0 = cmpb.gt(r0, r1)
define i32 @test21(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgt(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test22
; CHECK: p0 = cmpb.gtu(r0, r1)
define i32 @test22(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgtu(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test23
; CHECK: p0 = cmpb.eq(r0, #56)
define i32 @test23(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbeqi(i32 %Rs, i32 56)
ret i32 %0
}
; CHECK-LABEL: @test24
; CHECK: p0 = cmpb.gt(r0, #29)
define i32 @test24(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgti(i32 %Rs, i32 29)
ret i32 %0
}
; CHECK-LABEL: @test25
; CHECK: p0 = cmpb.gtu(r0, #111)
define i32 @test25(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgtui(i32 %Rs, i32 111)
ret i32 %0
}
; CHECK-LABEL: @test30
; CHECK: p0 = cmph.eq(r0, r1)
define i32 @test30(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpheq(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test31
; CHECK: p0 = cmph.gt(r0, r1)
define i32 @test31(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgt(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test32
; CHECK: p0 = cmph.gtu(r0, r1)
define i32 @test32(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgtu(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test33
; CHECK: p0 = cmph.eq(r0, #-123)
define i32 @test33(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpheqi(i32 %Rs, i32 -123)
ret i32 %0
}
; CHECK-LABEL: @test34
; CHECK: p0 = cmph.gt(r0, #-3)
define i32 @test34(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgti(i32 %Rs, i32 -3)
ret i32 %0
}
; CHECK-LABEL: @test35
; CHECK: p0 = cmph.gtu(r0, #13)
define i32 @test35(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgtui(i32 %Rs, i32 13)
ret i32 %0
}
; CHECK-LABEL: @test40
; CHECK: r1:0 = vmux(p0, r3:2, r5:4)
define i64 @test40(i32 %Pu, i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.C2.vmux(i32 %Pu, i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test41
; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2))
define i32 @test41(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test50
; CHECK: r1:0 = add(r1:0, r3:2)
define i64 @test50(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test51
; CHECK: r1:0 = add(r1:0, r3:2):sat
define i64 @test51(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addpsat(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test52
; CHECK: r1:0 = sub(r1:0, r3:2)
define i64 @test52(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.subp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test53
; CHECK: r1:0 = add(r0, r3:2)
define i64 @test53(i32 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addsp(i32 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test54
; CHECK: r1:0 = and(r1:0, r3:2)
define i64 @test54(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.andp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test55
; CHECK: r1:0 = or(r1:0, r3:2)
define i64 @test55(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.orp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test56
; CHECK: r1:0 = xor(r1:0, r3:2)
define i64 @test56(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.xorp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test57
; CHECK: r1:0 = and(r1:0, ~r3:2)
define i64 @test57(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.andnp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test58
; CHECK: r1:0 = or(r1:0, ~r3:2)
define i64 @test58(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.ornp(i64 %Rs, i64 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test60
; CHECK: r0 = add(r0.l, r1.l)
define i32 @test60(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test61
; CHECK: r0 = add(r0.l, r1.h)
define i32 @test61(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test62
; CHECK: r0 = add(r0.l, r1.l):sat
define i32 @test62(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test63
; CHECK: r0 = add(r0.l, r1.h):sat
define i32 @test63(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test64
; CHECK: r0 = add(r0.l, r1.l):<<16
define i32 @test64(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test65
; CHECK: r0 = add(r0.l, r1.h):<<16
define i32 @test65(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test66
; CHECK: r0 = add(r0.h, r1.l):<<16
define i32 @test66(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test67
; CHECK: r0 = add(r0.h, r1.h):<<16
define i32 @test67(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test68
; CHECK: r0 = add(r0.l, r1.l):sat:<<16
define i32 @test68(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test69
; CHECK: r0 = add(r0.l, r1.h):sat:<<16
define i32 @test69(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test6A
; CHECK: r0 = add(r0.h, r1.l):sat:<<16
define i32 @test6A(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test6B
; CHECK: r0 = add(r0.h, r1.h):sat:<<16
define i32 @test6B(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test70
; CHECK: r0 = sub(r0.l, r1.l)
define i32 @test70(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test71
; CHECK: r0 = sub(r0.l, r1.h)
define i32 @test71(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test72
; CHECK: r0 = sub(r0.l, r1.l):sat
define i32 @test72(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test73
; CHECK: r0 = sub(r0.l, r1.h):sat
define i32 @test73(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test74
; CHECK: r0 = sub(r0.l, r1.l):<<16
define i32 @test74(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test75
; CHECK: r0 = sub(r0.l, r1.h):<<16
define i32 @test75(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test76
; CHECK: r0 = sub(r0.h, r1.l):<<16
define i32 @test76(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test77
; CHECK: r0 = sub(r0.h, r1.h):<<16
define i32 @test77(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test78
; CHECK: r0 = sub(r0.l, r1.l):sat:<<16
define i32 @test78(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test79
; CHECK: r0 = sub(r0.l, r1.h):sat:<<16
define i32 @test79(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test7A
; CHECK: r0 = sub(r0.h, r1.l):sat:<<16
define i32 @test7A(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test7B
; CHECK: r0 = sub(r0.h, r1.h):sat:<<16
define i32 @test7B(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test90
; CHECK: r0 = and(#1, asl(r0, #2))
define i32 @test90(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.andi.asl.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test91
; CHECK: r0 = or(#1, asl(r0, #2))
define i32 @test91(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.ori.asl.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test92
; CHECK: r0 = add(#1, asl(r0, #2))
define i32 @test92(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.addi.asl.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test93
; CHECK: r0 = sub(#1, asl(r0, #2))
define i32 @test93(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.subi.asl.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test94
; CHECK: r0 = and(#1, lsr(r0, #2))
define i32 @test94(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test95
; CHECK: r0 = or(#1, lsr(r0, #2))
define i32 @test95(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test96
; CHECK: r0 = add(#1, lsr(r0, #2))
define i32 @test96(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test97
; CHECK: r0 = sub(#1, lsr(r0, #2))
define i32 @test97(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 1, i32 %Rs, i32 2)
ret i32 %0
}
; CHECK-LABEL: @test100
; CHECK: r1:0 = bitsplit(r0, r1)
define i64 @test100(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.bitsplit(i32 %Rs, i32 %Rt)
ret i64 %0
}
; CHECK-LABEL: @test101
; CHECK: r0 = modwrap(r0, r1)
define i32 @test101(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %Rs, i32 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test102
; CHECK: r0 = parity(r1:0, r3:2)
define i32 @test102(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S2.parityp(i64 %Rs, i64 %Rt)
ret i32 %0
}
; CHECK-LABEL: @test103
; CHECK: r0 = parity(r0, r1)
define i32 @test103(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.parity(i32 %Rs, i32 %Rt)
ret i32 %0
}
declare i32 @llvm.hexagon.C2.cmpeqp(i64, i64) #1
declare i32 @llvm.hexagon.C2.cmpgtp(i64, i64) #1
declare i32 @llvm.hexagon.C2.cmpgtup(i64, i64) #1
declare i32 @llvm.hexagon.A4.rcmpeq(i32, i32) #1
declare i32 @llvm.hexagon.A4.rcmpneq(i32, i32) #1
declare i32 @llvm.hexagon.A4.rcmpeqi(i32, i32) #1
declare i32 @llvm.hexagon.A4.rcmpneqi(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbeq(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbgt(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbgtu(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbeqi(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbgti(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpbgtui(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpheq(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmphgt(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmphgtu(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmpheqi(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmphgti(i32, i32) #1
declare i32 @llvm.hexagon.A4.cmphgtui(i32, i32) #1
declare i64 @llvm.hexagon.C2.vmux(i32, i64, i64) #1
declare i32 @llvm.hexagon.A4.vcmpbeq.any(i64, i64) #1
declare i64 @llvm.hexagon.A2.addp(i64, i64) #1
declare i64 @llvm.hexagon.A2.addpsat(i64, i64) #1
declare i64 @llvm.hexagon.A2.subp(i64, i64) #1
declare i64 @llvm.hexagon.A2.addsp(i32, i64) #1
declare i64 @llvm.hexagon.A2.andp(i64, i64) #1
declare i64 @llvm.hexagon.A2.orp(i64, i64) #1
declare i64 @llvm.hexagon.A2.xorp(i64, i64) #1
declare i64 @llvm.hexagon.A4.ornp(i64, i64) #1
declare i64 @llvm.hexagon.A4.andnp(i64, i64) #1
declare i32 @llvm.hexagon.A2.addh.l16.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.l16.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.lh(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.hh(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.l16.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.l16.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.lh(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.hh(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32, i32) #1
declare i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32, i32) #1
declare i64 @llvm.hexagon.A4.bitsplit(i32, i32) #1
declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #1
declare i32 @llvm.hexagon.S2.parityp(i64, i64) #1
declare i32 @llvm.hexagon.S4.parity(i32, i32) #1
declare i32 @llvm.hexagon.S4.andi.asl.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.ori.asl.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.addi.asl.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.subi.asl.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.andi.lsr.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.ori.lsr.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.addi.lsr.ri(i32, i32, i32) #1
declare i32 @llvm.hexagon.S4.subi.lsr.ri(i32, i32, i32) #1
attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }

View File

@ -0,0 +1,56 @@
; RUN: llc -march=hexagon -O2 < %s | FileCheck %s
define void @foo(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind optsize {
entry:
%cmp = icmp sgt i32 %n, 100
br i1 %cmp, label %for.body.preheader, label %for.cond4.preheader
; CHECK: endloop0
; CHECK: endloop0
; CHECK-NOT: endloop0
for.body.preheader:
br label %for.body
for.cond4.preheader:
%cmp113 = icmp sgt i32 %n, 0
br i1 %cmp113, label %for.body7.preheader, label %if.end
for.body7.preheader:
br label %for.body7
for.body:
%arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %B, %for.body.preheader ]
%arrayidx3.phi = phi i32* [ %arrayidx3.inc, %for.body ], [ %A, %for.body.preheader ]
%i.014 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%0 = load i32, i32* %arrayidx.phi, align 4
%sub = add nsw i32 %0, -1
store i32 %sub, i32* %arrayidx3.phi, align 4
%inc = add nsw i32 %i.014, 1
%exitcond = icmp eq i32 %inc, %n
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
%arrayidx3.inc = getelementptr i32, i32* %arrayidx3.phi, i32 1
br i1 %exitcond, label %if.end.loopexit, label %for.body
for.body7:
%arrayidx8.phi = phi i32* [ %arrayidx8.inc, %for.body7 ], [ %B, %for.body7.preheader ]
%arrayidx9.phi = phi i32* [ %arrayidx9.inc, %for.body7 ], [ %A, %for.body7.preheader ]
%i.117 = phi i32 [ %inc11, %for.body7 ], [ 0, %for.body7.preheader ]
%1 = load i32, i32* %arrayidx8.phi, align 4
%add = add nsw i32 %1, 1
store i32 %add, i32* %arrayidx9.phi, align 4
%inc11 = add nsw i32 %i.117, 1
%exitcond18 = icmp eq i32 %inc11, %n
%arrayidx8.inc = getelementptr i32, i32* %arrayidx8.phi, i32 1
%arrayidx9.inc = getelementptr i32, i32* %arrayidx9.phi, i32 1
br i1 %exitcond18, label %if.end.loopexit21, label %for.body7
if.end.loopexit:
br label %if.end
if.end.loopexit21:
br label %if.end
if.end:
ret void
}