diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 7fdaee59a57..2cbce282cbe 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -18,7 +18,8 @@ enum { VOP1 = 1 << 5, VOP2 = 1 << 6, VOP3 = 1 << 7, - VOPC = 1 << 8 + VOPC = 1 << 8, + SALU = 1 << 9 }; } diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp index 7f07b01f087..655db5b01da 100644 --- a/lib/Target/R600/SIFixSGPRCopies.cpp +++ b/lib/Target/R600/SIFixSGPRCopies.cpp @@ -65,10 +65,13 @@ /// ultimately led to the creation of an illegal COPY. //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "sgpr-copies" #include "AMDGPU.h" #include "SIInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -79,9 +82,16 @@ class SIFixSGPRCopies : public MachineFunctionPass { private: static char ID; - const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI, + const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI, const MachineRegisterInfo &MRI, - unsigned Reg) const; + unsigned Reg, + unsigned SubReg) const; + const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI, + const MachineRegisterInfo &MRI, + unsigned Reg, + unsigned SubReg) const; + bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI, + const MachineRegisterInfo &MRI) const; public: SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } @@ -102,25 +112,41 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { return new SIFixSGPRCopies(tm); } -/// This functions walks the use/def chains starting with the definition of -/// \p Reg until it finds an Instruction that isn't a COPY returns -/// the register class of that instruction. -const TargetRegisterClass *SIFixSGPRCopies::inferRegClass( - const TargetRegisterInfo *TRI, +static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isReg() || + !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) + continue; + + if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) + return true; + } + return false; +} + +/// This functions walks the use list of Reg until it finds an Instruction +/// that isn't a COPY returns the register class of that instruction. +/// \param[out] The register defined by the first non-COPY instruction. +const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses( + const SIRegisterInfo *TRI, const MachineRegisterInfo &MRI, - unsigned Reg) const { + unsigned Reg, + unsigned SubReg) const { // The Reg parameter to the function must always be defined by either a PHI // or a COPY, therefore it cannot be a physical register. assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Reg cannot be a physical register"); const TargetRegisterClass *RC = MRI.getRegClass(Reg); + RC = TRI->getSubRegClass(RC, SubReg); for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), E = MRI.use_end(); I != E; ++I) { switch (I->getOpcode()) { case AMDGPU::COPY: - RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI, - I->getOperand(0).getReg())); + RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI, + I->getOperand(0).getReg(), + I->getOperand(0).getSubReg())); break; } } @@ -128,9 +154,50 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClass( return RC; } +const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef( + const SIRegisterInfo *TRI, + const MachineRegisterInfo &MRI, + unsigned Reg, + unsigned SubReg) const { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); + return TRI->getSubRegClass(RC, SubReg); + } + MachineInstr *Def = MRI.getVRegDef(Reg); + if (Def->getOpcode() != AMDGPU::COPY) { + return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg); + } + + return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(), + Def->getOperand(1).getSubReg()); +} + +bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, + const SIRegisterInfo *TRI, + const MachineRegisterInfo &MRI) const { + + unsigned DstReg = Copy.getOperand(0).getReg(); + unsigned SrcReg = Copy.getOperand(1).getReg(); + unsigned SrcSubReg = Copy.getOperand(1).getSubReg(); + const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); + + if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + DstRC == &AMDGPU::M0RegRegClass) + return false; + + const TargetRegisterClass *SrcRC = TRI->getSubRegClass( + MRI.getRegClass(SrcReg), SrcSubReg); + + return TRI->isSGPRClass(DstRC) && + !TRI->getCommonSubClass(DstRC, SrcRC); +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const SIRegisterInfo *TRI = static_cast( + MF.getTarget().getRegisterInfo()); + const SIInstrInfo *TII = static_cast( + MF.getTarget().getInstrInfo()); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { @@ -138,13 +205,59 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; - if (MI.getOpcode() != AMDGPU::PHI) { - continue; + if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) { + DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n"); + DEBUG(MI.print(dbgs())); + TII->moveToVALU(MI); + + } + + switch (MI.getOpcode()) { + default: continue; + case AMDGPU::PHI: { + DEBUG(dbgs() << " Fixing PHI:\n"); + DEBUG(MI.print(dbgs())); + + for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { + unsigned Reg = MI.getOperand(i).getReg(); + const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg, + MI.getOperand(0).getSubReg()); + MRI.constrainRegClass(Reg, RC); + } + unsigned Reg = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, + MI.getOperand(0).getSubReg()); + if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) { + MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); + } + + if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) + break; + + // If a PHI node defines an SGPR and any of its operands are VGPRs, + // then we need to move it to the VALU. + for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { + unsigned Reg = MI.getOperand(i).getReg(); + if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { + TII->moveToVALU(MI); + break; + } + } + + break; + } + case AMDGPU::REG_SEQUENCE: { + if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || + !hasVGPROperands(MI, TRI)) + continue; + + DEBUG(dbgs() << "Fixing REG_SEQUENCE: \n"); + DEBUG(MI.print(dbgs())); + + TII->moveToVALU(MI); + TII->legalizeOperands(&MI); + break; } - unsigned Reg = MI.getOperand(0).getReg(); - const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg); - if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) { - MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); } } } diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 962e266f215..53ebaaf15a7 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -23,6 +23,7 @@ class InstSI pattern> : field bits<1> VOP2 = 0; field bits<1> VOP3 = 0; field bits<1> VOPC = 0; + field bits<1> SALU = 0; let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; @@ -33,6 +34,7 @@ class InstSI pattern> : let TSFlags{6} = VOP2; let TSFlags{7} = VOP3; let TSFlags{8} = VOPC; + let TSFlags{9} = SALU; } class Enc32 pattern> : @@ -67,6 +69,7 @@ class SOP1 op, dag outs, dag ins, string asm, list pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let SALU = 1; } class SOP2 op, dag outs, dag ins, string asm, list pattern> : @@ -85,6 +88,7 @@ class SOP2 op, dag outs, dag ins, string asm, list pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let SALU = 1; } class SOPC op, dag outs, dag ins, string asm, list pattern> : @@ -102,6 +106,7 @@ class SOPC op, dag outs, dag ins, string asm, list pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let SALU = 1; } class SOPK op, dag outs, dag ins, string asm, list pattern> : @@ -118,6 +123,7 @@ class SOPK op, dag outs, dag ins, string asm, list pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let SALU = 1; } class SOPP op, dag ins, string asm, list pattern> : Enc32 < @@ -135,6 +141,7 @@ class SOPP op, dag ins, string asm, list pattern> : Enc32 < let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let SALU = 1; } class SMRD op, bits<1> imm, dag outs, dag ins, string asm, diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index c6352294c9a..b8e75cbf118 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -31,6 +31,10 @@ const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const { return RI; } +//===----------------------------------------------------------------------===// +// TargetInstrInfo callbacks +//===----------------------------------------------------------------------===// + void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, @@ -185,11 +189,36 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { - if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() || - !MI->getOperand(2).isReg()) + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) return 0; - MI = TargetInstrInfo::commuteInstruction(MI, NewMI); + // Cannot commute VOP2 if src0 is SGPR. + if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() && + RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg()))) + return 0; + + if (!MI->getOperand(2).isReg()) { + // XXX: Commute instructions with FPImm operands + if (NewMI || MI->getOperand(2).isFPImm() || + (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { + return 0; + } + + // XXX: Commute VOP3 instructions with abs and neg set. + if (isVOP3(MI->getOpcode()) && + (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::abs)).getImm() || + MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::neg)).getImm())) + return 0; + + unsigned Reg = MI->getOperand(1).getReg(); + MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); + MI->getOperand(2).ChangeToRegister(Reg, false); + } else { + MI = TargetInstrInfo::commuteInstruction(MI, NewMI); + } if (MI) MI->setDesc(get(commuteOpcode(MI->getOpcode()))); @@ -244,6 +273,10 @@ bool SIInstrInfo::isVOPC(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOPC; } +bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; +} + bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { if(MO.isImm()) { return MO.getImm() >= -16 && MO.getImm() <= 64; @@ -310,7 +343,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, // Verify SRC1 for VOP2 and VOPC if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { const MachineOperand &Src1 = MI->getOperand(Src1Idx); - if (Src1.isImm()) { + if (Src1.isImm() || Src1.isFPImm()) { ErrInfo = "VOP[2C] src1 cannot be an immediate."; return false; } @@ -334,6 +367,232 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, return true; } +unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: return AMDGPU::INSTRUCTION_LIST_END; + case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; + case AMDGPU::COPY: return AMDGPU::COPY; + case AMDGPU::PHI: return AMDGPU::PHI; + case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; + case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; + case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; + case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; + case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; + case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; + } +} + +bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { + return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; +} + +const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, + unsigned OpNo) const { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MCInstrDesc &Desc = get(MI.getOpcode()); + if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || + Desc.OpInfo[OpNo].RegClass == -1) + return MRI.getRegClass(MI.getOperand(OpNo).getReg()); + + unsigned RCID = Desc.OpInfo[OpNo].RegClass; + return RI.getRegClass(RCID); +} + +bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { + switch (MI.getOpcode()) { + case AMDGPU::COPY: + case AMDGPU::REG_SEQUENCE: + return RI.hasVGPRs(getOpRegClass(MI, 0)); + default: + return RI.hasVGPRs(getOpRegClass(MI, OpNo)); + } +} + +void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { + MachineBasicBlock::iterator I = MI; + MachineOperand &MO = MI->getOperand(OpIdx); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; + const TargetRegisterClass *RC = RI.getRegClass(RCID); + unsigned Opcode = AMDGPU::V_MOV_B32_e32; + if (MO.isReg()) { + Opcode = AMDGPU::COPY; + } else if (RI.isSGPRClass(RC)) { + Opcode = AMDGPU::S_MOV_B32; + } + + unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(RCID)); + BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), + Reg).addOperand(MO); + MO.ChangeToRegister(Reg, false); +} + +void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::src0); + int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::src1); + int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::src2); + + // Legalize VOP2 + if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { + MachineOperand &Src1 = MI->getOperand(Src1Idx); + // Legalize VOP2 instructions where src1 is not a VGPR. + if (Src1.isImm() || Src1.isFPImm() || + (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) { + if (MI->isCommutable()) { + if (commuteInstruction(MI)) + return; + } + legalizeOpWithMove(MI, Src1Idx); + } + } + + // Legalize VOP3 + if (isVOP3(MI->getOpcode())) { + int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx}; + unsigned SGPRReg = AMDGPU::NoRegister; + for (unsigned i = 0; i < 3; ++i) { + int Idx = VOP3Idx[i]; + if (Idx == -1) + continue; + MachineOperand &MO = MI->getOperand(Idx); + + if (MO.isReg()) { + if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) + continue; // VGPRs are legal + + if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { + SGPRReg = MO.getReg(); + // We can use one SGPR in each VOP3 instruction. + continue; + } + } else if (!isLiteralConstant(MO)) { + // If it is not a register and not a literal constant, then it must be + // an inline constant which is always legal. + continue; + } + // If we make it this far, then the operand is not legal and we must + // legalize it. + legalizeOpWithMove(MI, Idx); + } + } + + // Legalize REG_SEQUENCE + // The register class of the operands much be the same type as the register + // class of the output. + if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { + const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL; + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { + if (!MI->getOperand(i).isReg() || + !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) + continue; + const TargetRegisterClass *OpRC = + MRI.getRegClass(MI->getOperand(i).getReg()); + if (RI.hasVGPRs(OpRC)) { + VRC = OpRC; + } else { + SRC = OpRC; + } + } + + // If any of the operands are VGPR registers, then they all most be + // otherwise we will create illegal VGPR->SGPR copies when legalizing + // them. + if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { + if (!VRC) { + assert(SRC); + VRC = RI.getEquivalentVGPRClass(SRC); + } + RC = VRC; + } else { + RC = SRC; + } + + // Update all the operands so they have the same type. + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { + if (!MI->getOperand(i).isReg() || + !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) + continue; + unsigned DstReg = MRI.createVirtualRegister(RC); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(AMDGPU::COPY), DstReg) + .addOperand(MI->getOperand(i)); + MI->getOperand(i).setReg(DstReg); + } + } +} + +void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { + SmallVector Worklist; + Worklist.push_back(&TopInst); + + while (!Worklist.empty()) { + MachineInstr *Inst = Worklist.pop_back_val(); + unsigned NewOpcode = getVALUOp(*Inst); + if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) + continue; + + MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo(); + + // Use the new VALU Opcode. + const MCInstrDesc &NewDesc = get(NewOpcode); + Inst->setDesc(NewDesc); + + // Add the implict and explicit register definitions. + if (NewDesc.ImplicitUses) { + for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { + Inst->addOperand(MachineOperand::CreateReg(NewDesc.ImplicitUses[i], + false, true)); + } + } + + if (NewDesc.ImplicitDefs) { + for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { + Inst->addOperand(MachineOperand::CreateReg(NewDesc.ImplicitDefs[i], + true, true)); + } + } + + legalizeOperands(Inst); + + // Update the destination register class. + const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); + + switch (Inst->getOpcode()) { + // For target instructions, getOpRegClass just returns the virtual + // register class associated with the operand, so we need to find an + // equivalent VGPR register class in order to move the instruction to the + // VALU. + case AMDGPU::COPY: + case AMDGPU::PHI: + case AMDGPU::REG_SEQUENCE: + if (RI.hasVGPRs(NewDstRC)) + continue; + NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); + if (!NewDstRC) + continue; + break; + default: + break; + } + + unsigned DstReg = Inst->getOperand(0).getReg(); + unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); + MRI.replaceRegWith(DstReg, NewDstReg); + + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), + E = MRI.use_end(); I != E; ++I) { + MachineInstr &UseMI = *I; + if (!canReadVGPR(UseMI, I.getOperandNo())) { + Worklist.push_back(&UseMI); + } + } + } +} + //===----------------------------------------------------------------------===// // Indirect addressing callbacks //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 72bb25e369f..1ab37861134 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -62,6 +62,42 @@ public: virtual int getIndirectIndexEnd(const MachineFunction &MF) const; + bool isSALUInstr(const MachineInstr &MI) const; + unsigned getVALUOp(const MachineInstr &MI) const; + bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const; + + /// \brief Return the correct register class for \p OpNo. For target-specific + /// instructions, this will return the register class that has been defined + /// in tablegen. For generic instructions, like REG_SEQUENCE it will return + /// the register class of its machine operand. + /// to infer the correct register class base on the other operands. + const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, + unsigned OpNo) const;\ + + /// \returns true if it is legal for the operand at index \p OpNo + /// to read a VGPR. + bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const; + + /// \brief Legalize the \p OpIndex operand of this instruction by inserting + /// a MOV. For example: + /// ADD_I32_e32 VGPR0, 15 + /// to + /// MOV VGPR1, 15 + /// ADD_I32_e32 VGPR0, VGPR1 + /// + /// If the operand being legalized is a register, then a COPY will be used + /// instead of MOV. + void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const; + + /// \brief Legalize all operands in this instruction. This function may + /// create new instruction and insert them before \p MI. + void legalizeOperands(MachineInstr *MI) const; + + /// \brief Replace this instruction's opcode with the equivalent VALU + /// opcode. This function will also move the users of \p MI to the + /// VALU if necessary. + void moveToVALU(MachineInstr &MI) const; + virtual unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ed42a2ad954..b55f59d1618 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -172,6 +172,11 @@ class SOP2_64 op, string opName, list pattern> : SOP2 < opName#" $dst, $src0, $src1", pattern >; +class SOP2_SHIFT_64 op, string opName, list pattern> : SOP2 < + op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1), + opName#" $dst, $src0, $src1", pattern +>; + class SOPC_32 op, string opName, list pattern> : SOPC < op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 048c1579b66..1823168dfac 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -941,9 +941,13 @@ defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", >; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; +let hasPostISelHook = 1 in { + defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", [(set i32:$dst, (shl i32:$src0, i32:$src1))] >; + +} defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", @@ -1172,12 +1176,31 @@ def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; -def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>; -def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>; -def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>; -def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>; -def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>; -def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>; + +// Use added complexity so these patterns are preferred to the VALU patterns. +let AddedComplexity = 1 in { + +def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", + [(set i32:$dst, (shl i32:$src0, i32:$src1))] +>; +def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64", + [(set i64:$dst, (shl i64:$src0, i32:$src1))] +>; +def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", + [(set i32:$dst, (srl i32:$src0, i32:$src1))] +>; +def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64", + [(set i64:$dst, (srl i64:$src0, i32:$src1))] +>; +def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", + [(set i32:$dst, (sra i32:$src0, i32:$src1))] +>; +def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64", + [(set i64:$dst, (sra i64:$src0, i32:$src1))] +>; + +} // End AddedComplexity = 1 + def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 536ac9a7a2b..e06a02257fe 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -72,13 +72,49 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { return NULL; } -bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) { +bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const { if (!RC) { return false; } - return RC == &AMDGPU::SReg_32RegClass || - RC == &AMDGPU::SReg_64RegClass || - RC == &AMDGPU::SReg_128RegClass || - RC == &AMDGPU::SReg_256RegClass || - RC == &AMDGPU::SReg_512RegClass; + return !hasVGPRs(RC); +} + +bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { + return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) || + getCommonSubClass(&AMDGPU::VReg_512RegClass, RC); +} + +const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( + const TargetRegisterClass *SRC) const { + if (hasVGPRs(SRC)) { + return SRC; + } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) { + return &AMDGPU::VReg_32RegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) { + return &AMDGPU::VReg_64RegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) { + return &AMDGPU::VReg_128RegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) { + return &AMDGPU::VReg_256RegClass; + } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) { + return &AMDGPU::VReg_512RegClass; + } + return NULL; +} + +const TargetRegisterClass *SIRegisterInfo::getSubRegClass( + const TargetRegisterClass *RC, unsigned SubIdx) const { + if (SubIdx == AMDGPU::NoSubRegister) + return RC; + + // If this register has a sub-register, we can safely assume it is a 32-bit + // register, becuase all of SI's sub-registers are 32-bit. + if (isSGPRClass(RC)) { + return &AMDGPU::SGPR_32RegClass; + } else { + return &AMDGPU::VGPR_32RegClass; + } } diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 0880a898983..ba831b0f77b 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -47,7 +47,20 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { const TargetRegisterClass *getPhysRegClass(unsigned Reg) const; /// \returns true if this class contains only SGPR registers - static bool isSGPRClass(const TargetRegisterClass *RC); + bool isSGPRClass(const TargetRegisterClass *RC) const; + + /// \returns true if this class contains VGPR registers. + bool hasVGPRs(const TargetRegisterClass *RC) const; + + /// \returns A VGPR reg class with the same width as \p SRC + const TargetRegisterClass *getEquivalentVGPRClass( + const TargetRegisterClass *SRC) const; + + /// \returns The register class that is used for a sub-register of \p RC for + /// the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC will + /// be returned. + const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC, + unsigned SubIdx) const; }; } // End namespace llvm diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index 4a3ecc03176..9446aa8ea9c 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -1,8 +1,23 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK -; CHECK: @fneg_v2 -; CHECK: -PV -; CHECK: -PV +; R600-CHECK-LABEL: @fneg +; R600-CHECK: -PV +; SI-CHECK-LABEL: @fneg +; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 +define void @fneg(float addrspace(1)* %out, float %in) { +entry: + %0 = fsub float -0.000000e+00, %in + store float %0, float addrspace(1)* %out + ret void +} + +; R600-CHECK-LABEL: @fneg_v2 +; R600-CHECK: -PV +; R600-CHECK: -PV +; SI-CHECK-LABEL: @fneg_v2 +; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 +; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1 define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) { entry: %0 = fsub <2 x float> , %in @@ -31,9 +46,12 @@ entry: ; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000)) ; unless the target returns true for isNegFree() -; CHECK-NOT: XOR -; CHECK: -KC0[2].Z - +; R600-CHECK-LABEL: @fneg_free +; R600-CHECK-NOT: XOR +; R600-CHECK: -KC0[2].Z +; SI-CHECK-LABEL: @fneg_free +; XXX: We could use V_ADD_F32_e64 with the negate bit here instead. +; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0, 0, 0 define void @fneg_free(float addrspace(1)* %out, i32 %in) { entry: %0 = bitcast i32 %in to float diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index ca131334175..632509ccccf 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -7,10 +7,10 @@ ;===------------------------------------------------------------------------===; ; Load an i8 value from the global address space. -; R600-CHECK: @load_i8 +; R600-CHECK-LABEL: @load_i8 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: @load_i8 +; SI-CHECK-LABEL: @load_i8 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { %1 = load i8 addrspace(1)* %in @@ -19,13 +19,13 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { ret void } -; R600-CHECK: @load_i8_sext +; R600-CHECK-LABEL: @load_i8_sext ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] ; R600-CHECK: 24 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] ; R600-CHECK: 24 -; SI-CHECK: @load_i8_sext +; SI-CHECK-LABEL: @load_i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: @@ -35,10 +35,10 @@ entry: ret void } -; R600-CHECK: @load_v2i8 +; R600-CHECK-LABEL: @load_v2i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK: @load_v2i8 +; SI-CHECK-LABEL: @load_v2i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { @@ -49,7 +49,7 @@ entry: ret void } -; R600-CHECK: @load_v2i8_sext +; R600-CHECK-LABEL: @load_v2i8_sext ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] @@ -60,7 +60,7 @@ entry: ; R600-CHECK-DAG: 24 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] ; R600-CHECK-DAG: 24 -; SI-CHECK: @load_v2i8_sext +; SI-CHECK-LABEL: @load_v2i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { @@ -71,12 +71,12 @@ entry: ret void } -; R600-CHECK: @load_v4i8 +; R600-CHECK-LABEL: @load_v4i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK: @load_v4i8 +; SI-CHECK-LABEL: @load_v4i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE @@ -89,7 +89,7 @@ entry: ret void } -; R600-CHECK: @load_v4i8_sext +; R600-CHECK-LABEL: @load_v4i8_sext ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] @@ -110,7 +110,7 @@ entry: ; R600-CHECK-DAG: 24 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] ; R600-CHECK-DAG: 24 -; SI-CHECK: @load_v4i8_sext +; SI-CHECK-LABEL: @load_v4i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE @@ -124,9 +124,9 @@ entry: } ; Load an i16 value from the global address space. -; R600-CHECK: @load_i16 +; R600-CHECK-LABEL: @load_i16 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: @load_i16 +; SI-CHECK-LABEL: @load_i16 ; SI-CHECK: BUFFER_LOAD_USHORT define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { entry: @@ -136,13 +136,13 @@ entry: ret void } -; R600-CHECK: @load_i16_sext +; R600-CHECK-LABEL: @load_i16_sext ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] ; R600-CHECK: 16 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] ; R600-CHECK: 16 -; SI-CHECK: @load_i16_sext +; SI-CHECK-LABEL: @load_i16_sext ; SI-CHECK: BUFFER_LOAD_SSHORT define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { entry: @@ -152,10 +152,10 @@ entry: ret void } -; R600-CHECK: @load_v2i16 +; R600-CHECK-LABEL: @load_v2i16 ; R600-CHECK: VTX_READ_16 ; R600-CHECK: VTX_READ_16 -; SI-CHECK: @load_v2i16 +; SI-CHECK-LABEL: @load_v2i16 ; SI-CHECK: BUFFER_LOAD_USHORT ; SI-CHECK: BUFFER_LOAD_USHORT define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { @@ -166,7 +166,7 @@ entry: ret void } -; R600-CHECK: @load_v2i16_sext +; R600-CHECK-LABEL: @load_v2i16_sext ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] @@ -177,7 +177,7 @@ entry: ; R600-CHECK-DAG: 16 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] ; R600-CHECK-DAG: 16 -; SI-CHECK: @load_v2i16_sext +; SI-CHECK-LABEL: @load_v2i16_sext ; SI-CHECK: BUFFER_LOAD_SSHORT ; SI-CHECK: BUFFER_LOAD_SSHORT define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { @@ -188,12 +188,12 @@ entry: ret void } -; R600-CHECK: @load_v4i16 +; R600-CHECK-LABEL: @load_v4i16 ; R600-CHECK: VTX_READ_16 ; R600-CHECK: VTX_READ_16 ; R600-CHECK: VTX_READ_16 ; R600-CHECK: VTX_READ_16 -; SI-CHECK: @load_v4i16 +; SI-CHECK-LABEL: @load_v4i16 ; SI-CHECK: BUFFER_LOAD_USHORT ; SI-CHECK: BUFFER_LOAD_USHORT ; SI-CHECK: BUFFER_LOAD_USHORT @@ -206,7 +206,7 @@ entry: ret void } -; R600-CHECK: @load_v4i16_sext +; R600-CHECK-LABEL: @load_v4i16_sext ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] @@ -227,7 +227,7 @@ entry: ; R600-CHECK-DAG: 16 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] ; R600-CHECK-DAG: 16 -; SI-CHECK: @load_v4i16_sext +; SI-CHECK-LABEL: @load_v4i16_sext ; SI-CHECK: BUFFER_LOAD_SSHORT ; SI-CHECK: BUFFER_LOAD_SSHORT ; SI-CHECK: BUFFER_LOAD_SSHORT @@ -241,10 +241,10 @@ entry: } ; load an i32 value from the global address space. -; R600-CHECK: @load_i32 +; R600-CHECK-LABEL: @load_i32 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -; SI-CHECK: @load_i32 +; SI-CHECK-LABEL: @load_i32 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}} define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: @@ -254,10 +254,10 @@ entry: } ; load a f32 value from the global address space. -; R600-CHECK: @load_f32 +; R600-CHECK-LABEL: @load_f32 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -; SI-CHECK: @load_f32 +; SI-CHECK-LABEL: @load_f32 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}} define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { entry: @@ -267,10 +267,10 @@ entry: } ; load a v2f32 value from the global address space -; R600-CHECK: @load_v2f32 +; R600-CHECK-LABEL: @load_v2f32 ; R600-CHECK: VTX_READ_64 -; SI-CHECK: @load_v2f32 +; SI-CHECK-LABEL: @load_v2f32 ; SI-CHECK: BUFFER_LOAD_DWORDX2 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { entry: @@ -279,11 +279,11 @@ entry: ret void } -; R600-CHECK: @load_i64 +; R600-CHECK-LABEL: @load_i64 ; R600-CHECK: MEM_RAT ; R600-CHECK: MEM_RAT -; SI-CHECK: @load_i64 +; SI-CHECK-LABEL: @load_i64 ; SI-CHECK: BUFFER_LOAD_DWORDX2 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { entry: @@ -292,12 +292,12 @@ entry: ret void } -; R600-CHECK: @load_i64_sext +; R600-CHECK-LABEL: @load_i64_sext ; R600-CHECK: MEM_RAT ; R600-CHECK: MEM_RAT ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x ; R600-CHECK: 31 -; SI-CHECK: @load_i64_sext +; SI-CHECK-LABEL: @load_i64_sext ; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]] ; SI-CHECK: V_LSHL_B64 [[LSHL:v\[[0-9]:[0-9]\]]], [[VAL]], 32 ; SI-CHECK: V_ASHR_I64 v{{\[[0-9]:[0-9]\]}}, [[LSHL]], 32 @@ -310,7 +310,7 @@ entry: ret void } -; R600-CHECK: @load_i64_zext +; R600-CHECK-LABEL: @load_i64_zext ; R600-CHECK: MEM_RAT ; R600-CHECK: MEM_RAT define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -326,13 +326,13 @@ entry: ;===------------------------------------------------------------------------===; ; Load a sign-extended i8 value -; R600-CHECK: @load_const_i8_sext +; R600-CHECK-LABEL: @load_const_i8_sext ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] ; R600-CHECK: 24 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] ; R600-CHECK: 24 -; SI-CHECK: @load_const_i8_sext +; SI-CHECK-LABEL: @load_const_i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}}, define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { entry: @@ -343,9 +343,9 @@ entry: } ; Load an aligned i8 value -; R600-CHECK: @load_const_i8_aligned +; R600-CHECK-LABEL: @load_const_i8_aligned ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: @load_const_i8_aligned +; SI-CHECK-LABEL: @load_const_i8_aligned ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { entry: @@ -356,9 +356,9 @@ entry: } ; Load an un-aligned i8 value -; R600-CHECK: @load_const_i8_unaligned +; R600-CHECK-LABEL: @load_const_i8_unaligned ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: @load_const_i8_unaligned +; SI-CHECK-LABEL: @load_const_i8_unaligned ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { entry: @@ -370,13 +370,13 @@ entry: } ; Load a sign-extended i16 value -; R600-CHECK: @load_const_i16_sext +; R600-CHECK-LABEL: @load_const_i16_sext ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] ; R600-CHECK: 16 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] ; R600-CHECK: 16 -; SI-CHECK: @load_const_i16_sext +; SI-CHECK-LABEL: @load_const_i16_sext ; SI-CHECK: BUFFER_LOAD_SSHORT define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { entry: @@ -387,9 +387,9 @@ entry: } ; Load an aligned i16 value -; R600-CHECK: @load_const_i16_aligned +; R600-CHECK-LABEL: @load_const_i16_aligned ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: @load_const_i16_aligned +; SI-CHECK-LABEL: @load_const_i16_aligned ; SI-CHECK: BUFFER_LOAD_USHORT define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { entry: @@ -400,9 +400,9 @@ entry: } ; Load an un-aligned i16 value -; R600-CHECK: @load_const_i16_unaligned +; R600-CHECK-LABEL: @load_const_i16_unaligned ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: @load_const_i16_unaligned +; SI-CHECK-LABEL: @load_const_i16_unaligned ; SI-CHECK: BUFFER_LOAD_USHORT define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { entry: @@ -414,10 +414,10 @@ entry: } ; Load an i32 value from the constant address space. -; R600-CHECK: @load_const_addrspace_i32 +; R600-CHECK-LABEL: @load_const_addrspace_i32 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -; SI-CHECK: @load_const_addrspace_i32 +; SI-CHECK-LABEL: @load_const_addrspace_i32 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}} define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: @@ -427,10 +427,10 @@ entry: } ; Load a f32 value from the constant address space. -; R600-CHECK: @load_const_addrspace_f32 +; R600-CHECK-LABEL: @load_const_addrspace_f32 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -; SI-CHECK: @load_const_addrspace_f32 +; SI-CHECK-LABEL: @load_const_addrspace_f32 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}} define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { %1 = load float addrspace(2)* %in @@ -443,9 +443,9 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace( ;===------------------------------------------------------------------------===; ; Load an i8 value from the local address space. -; R600-CHECK: @load_i8_local +; R600-CHECK-LABEL: @load_i8_local ; R600-CHECK: LDS_UBYTE_READ_RET -; SI-CHECK: @load_i8_local +; SI-CHECK-LABEL: @load_i8_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_U8 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { @@ -455,10 +455,10 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { ret void } -; R600-CHECK: @load_i8_sext_local +; R600-CHECK-LABEL: @load_i8_sext_local ; R600-CHECK: LDS_UBYTE_READ_RET ; R600-CHECK: ASHR -; SI-CHECK: @load_i8_sext_local +; SI-CHECK-LABEL: @load_i8_sext_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_I8 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { @@ -469,10 +469,10 @@ entry: ret void } -; R600-CHECK: @load_v2i8_local +; R600-CHECK-LABEL: @load_v2i8_local ; R600-CHECK: LDS_UBYTE_READ_RET ; R600-CHECK: LDS_UBYTE_READ_RET -; SI-CHECK: @load_v2i8_local +; SI-CHECK-LABEL: @load_v2i8_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_U8 ; SI-CHECK: DS_READ_U8 @@ -484,12 +484,12 @@ entry: ret void } -; R600-CHECK: @load_v2i8_sext_local +; R600-CHECK-LABEL: @load_v2i8_sext_local ; R600-CHECK-DAG: LDS_UBYTE_READ_RET ; R600-CHECK-DAG: LDS_UBYTE_READ_RET ; R600-CHECK-DAG: ASHR ; R600-CHECK-DAG: ASHR -; SI-CHECK: @load_v2i8_sext_local +; SI-CHECK-LABEL: @load_v2i8_sext_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_I8 ; SI-CHECK: DS_READ_I8 @@ -501,12 +501,12 @@ entry: ret void } -; R600-CHECK: @load_v4i8_local +; R600-CHECK-LABEL: @load_v4i8_local ; R600-CHECK: LDS_UBYTE_READ_RET ; R600-CHECK: LDS_UBYTE_READ_RET ; R600-CHECK: LDS_UBYTE_READ_RET ; R600-CHECK: LDS_UBYTE_READ_RET -; SI-CHECK: @load_v4i8_local +; SI-CHECK-LABEL: @load_v4i8_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_U8 ; SI-CHECK: DS_READ_U8 @@ -520,7 +520,7 @@ entry: ret void } -; R600-CHECK: @load_v4i8_sext_local +; R600-CHECK-LABEL: @load_v4i8_sext_local ; R600-CHECK-DAG: LDS_UBYTE_READ_RET ; R600-CHECK-DAG: LDS_UBYTE_READ_RET ; R600-CHECK-DAG: LDS_UBYTE_READ_RET @@ -529,7 +529,7 @@ entry: ; R600-CHECK-DAG: ASHR ; R600-CHECK-DAG: ASHR ; R600-CHECK-DAG: ASHR -; SI-CHECK: @load_v4i8_sext_local +; SI-CHECK-LABEL: @load_v4i8_sext_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_I8 ; SI-CHECK: DS_READ_I8 @@ -544,9 +544,9 @@ entry: } ; Load an i16 value from the local address space. -; R600-CHECK: @load_i16_local +; R600-CHECK-LABEL: @load_i16_local ; R600-CHECK: LDS_USHORT_READ_RET -; SI-CHECK: @load_i16_local +; SI-CHECK-LABEL: @load_i16_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_U16 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { @@ -557,10 +557,10 @@ entry: ret void } -; R600-CHECK: @load_i16_sext_local +; R600-CHECK-LABEL: @load_i16_sext_local ; R600-CHECK: LDS_USHORT_READ_RET ; R600-CHECK: ASHR -; SI-CHECK: @load_i16_sext_local +; SI-CHECK-LABEL: @load_i16_sext_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_I16 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { @@ -571,10 +571,10 @@ entry: ret void } -; R600-CHECK: @load_v2i16_local +; R600-CHECK-LABEL: @load_v2i16_local ; R600-CHECK: LDS_USHORT_READ_RET ; R600-CHECK: LDS_USHORT_READ_RET -; SI-CHECK: @load_v2i16_local +; SI-CHECK-LABEL: @load_v2i16_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_U16 ; SI-CHECK: DS_READ_U16 @@ -586,12 +586,12 @@ entry: ret void } -; R600-CHECK: @load_v2i16_sext_local +; R600-CHECK-LABEL: @load_v2i16_sext_local ; R600-CHECK-DAG: LDS_USHORT_READ_RET ; R600-CHECK-DAG: LDS_USHORT_READ_RET ; R600-CHECK-DAG: ASHR ; R600-CHECK-DAG: ASHR -; SI-CHECK: @load_v2i16_sext_local +; SI-CHECK-LABEL: @load_v2i16_sext_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_I16 ; SI-CHECK: DS_READ_I16 @@ -603,12 +603,12 @@ entry: ret void } -; R600-CHECK: @load_v4i16_local +; R600-CHECK-LABEL: @load_v4i16_local ; R600-CHECK: LDS_USHORT_READ_RET ; R600-CHECK: LDS_USHORT_READ_RET ; R600-CHECK: LDS_USHORT_READ_RET ; R600-CHECK: LDS_USHORT_READ_RET -; SI-CHECK: @load_v4i16_local +; SI-CHECK-LABEL: @load_v4i16_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_U16 ; SI-CHECK: DS_READ_U16 @@ -622,7 +622,7 @@ entry: ret void } -; R600-CHECK: @load_v4i16_sext_local +; R600-CHECK-LABEL: @load_v4i16_sext_local ; R600-CHECK-DAG: LDS_USHORT_READ_RET ; R600-CHECK-DAG: LDS_USHORT_READ_RET ; R600-CHECK-DAG: LDS_USHORT_READ_RET @@ -631,7 +631,7 @@ entry: ; R600-CHECK-DAG: ASHR ; R600-CHECK-DAG: ASHR ; R600-CHECK-DAG: ASHR -; SI-CHECK: @load_v4i16_sext_local +; SI-CHECK-LABEL: @load_v4i16_sext_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_I16 ; SI-CHECK: DS_READ_I16 @@ -646,9 +646,9 @@ entry: } ; load an i32 value from the glocal address space. -; R600-CHECK: @load_i32_local +; R600-CHECK-LABEL: @load_i32_local ; R600-CHECK: LDS_READ_RET -; SI-CHECK: @load_i32_local +; SI-CHECK-LABEL: @load_i32_local ; SI-CHECK-NOT: S_WQM_B64 ; SI-CHECK: DS_READ_B32 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { @@ -659,9 +659,9 @@ entry: } ; load a f32 value from the global address space. -; R600-CHECK: @load_f32_local +; R600-CHECK-LABEL: @load_f32_local ; R600-CHECK: LDS_READ_RET -; SI-CHECK: @load_f32_local +; SI-CHECK-LABEL: @load_f32_local ; SI-CHECK: DS_READ_B32 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { entry: @@ -671,10 +671,10 @@ entry: } ; load a v2f32 value from the local address space -; R600-CHECK: @load_v2f32_local +; R600-CHECK-LABEL: @load_v2f32_local ; R600-CHECK: LDS_READ_RET ; R600-CHECK: LDS_READ_RET -; SI-CHECK: @load_v2f32_local +; SI-CHECK-LABEL: @load_v2f32_local ; SI-CHECK: DS_READ_B32 ; SI-CHECK: DS_READ_B32 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll index 75ad5809bce..21628391000 100644 --- a/test/CodeGen/R600/lshl.ll +++ b/test/CodeGen/R600/lshl.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s -;CHECK: V_LSHL_B32_e64 v{{[0-9]}}, s{{[0-9]}}, 1 +;CHECK: S_LSHL_B32 s{{[0-9]}}, s{{[0-9]}}, 1 define void @test(i32 %p) { %i = mul i32 %p, 2 diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll index 7a0a526aa5d..886d1c4854d 100644 --- a/test/CodeGen/R600/lshr.ll +++ b/test/CodeGen/R600/lshr.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s -;CHECK: V_LSHR_B32_e64 {{v[0-9]}}, s{{[0-9]}}, 1 +;CHECK: S_LSHR_B32 s{{[0-9]}}, s{{[0-9]}}, 1 define void @test(i32 %p) { %i = udiv i32 %p, 2 diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll index 71fcb6d5d02..66a070ed9d4 100644 --- a/test/CodeGen/R600/mad_uint24.ll +++ b/test/CodeGen/R600/mad_uint24.ll @@ -2,9 +2,9 @@ ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK -; EG-CHECK: @u32_mad24 +; EG-CHECK-LABEL: @u32_mad24 ; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X -; SI-CHECK: @u32_mad24 +; SI-CHECK-LABEL: @u32_mad24 ; SI-CHECK: V_MAD_U32_U24 define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { @@ -19,7 +19,7 @@ entry: ret void } -; EG-CHECK: @i16_mad24 +; EG-CHECK-LABEL: @i16_mad24 ; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40 ; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44 ; EG-CHECK-DAG: VTX_READ_16 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48 @@ -30,7 +30,7 @@ entry: ; EG-CHECK: 16 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x ; EG-CHECK: 16 -; SI-CHECK: @i16_mad24 +; SI-CHECK-LABEL: @i16_mad24 ; SI-CHECK: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} ; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 16, [[MAD]] ; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 16, [[LSHL]] @@ -44,7 +44,7 @@ entry: ret void } -; EG-CHECK: @i8_mad24 +; EG-CHECK-LABEL: @i8_mad24 ; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40 ; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44 ; EG-CHECK-DAG: VTX_READ_8 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48 @@ -55,7 +55,7 @@ entry: ; EG-CHECK: 24 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x ; EG-CHECK: 24 -; SI-CHECK: @i8_mad24 +; SI-CHECK-LABEL: @i8_mad24 ; SI-CHECK: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} ; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 24, [[MUL]] ; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 24, [[LSHL]] diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll index 3729c81df5e..6e6d5496789 100644 --- a/test/CodeGen/R600/mul_uint24.ll +++ b/test/CodeGen/R600/mul_uint24.ll @@ -2,9 +2,9 @@ ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK -; EG-CHECK: @u32_mul24 +; EG-CHECK-LABEL: @u32_mul24 ; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W -; SI-CHECK: @u32_mul24 +; SI-CHECK-LABEL: @u32_mul24 ; SI-CHECK: V_MUL_U32_U24 define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) { @@ -18,7 +18,7 @@ entry: ret void } -; EG-CHECK: @i16_mul24 +; EG-CHECK-LABEL: @i16_mul24 ; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40 ; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44 ; The order of A and B does not matter. @@ -28,7 +28,7 @@ entry: ; EG-CHECK: 16 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x ; EG-CHECK: 16 -; SI-CHECK: @i16_mul24 +; SI-CHECK-LABEL: @i16_mul24 ; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} ; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 16, [[MUL]] ; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 16, [[LSHL]] @@ -41,7 +41,7 @@ entry: ret void } -; EG-CHECK: @i8_mul24 +; EG-CHECK-LABEL: @i8_mul24 ; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40 ; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44 ; The order of A and B does not matter. @@ -51,7 +51,7 @@ entry: ; EG-CHECK: 24 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x ; EG-CHECK: 24 -; SI-CHECK: @i8_mul24 +; SI-CHECK-LABEL: @i8_mul24 ; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} ; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 24, [[MUL]] ; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 24, [[LSHL]] diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll index 2190b3c2877..292616394b7 100644 --- a/test/CodeGen/R600/sra.ll +++ b/test/CodeGen/R600/sra.ll @@ -1,11 +1,11 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s -;EG-CHECK: @ashr_v2i32 +;EG-CHECK-LABEL: @ashr_v2i32 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @ashr_v2i32 +;SI-CHECK-LABEL: @ashr_v2i32 ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} @@ -18,13 +18,13 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i ret void } -;EG-CHECK: @ashr_v4i32 +;EG-CHECK-LABEL: @ashr_v4i32 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @ashr_v4i32 +;SI-CHECK-LABEL: @ashr_v4i32 ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} @@ -39,10 +39,10 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ret void } -;EG-CHECK: @ashr_i64 +;EG-CHECK-LABEL: @ashr_i64 ;EG-CHECK: ASHR -;SI-CHECK: @ashr_i64 +;SI-CHECK-LABEL: @ashr_i64 ;SI-CHECK: V_ASHR_I64 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) { entry: diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll index 5ccccc8f88f..0bd320ad9ce 100644 --- a/test/CodeGen/R600/trunc.ll +++ b/test/CodeGen/R600/trunc.ll @@ -19,7 +19,8 @@ define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) { ; SI-LABEL: @trunc_shl_i64: ; SI: S_LOAD_DWORDX2 ; SI: S_LOAD_DWORDX2 [[SREG:s\[[0-9]+:[0-9]+\]]] -; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, [[SREG]], 2 +; SI: S_LSHL_B64 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, [[SREG]], 2 +; SI: MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] ; SI: BUFFER_STORE_DWORD v[[LO_VREG]], define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) { %b = shl i64 %a, 2