diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 094045596fe..6f0c3076150 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -544,6 +544,13 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, unsigned NumDefs = Desc->getNumDefs(); unsigned NumOps = Desc->getNumOperands(); + // Commuted opcode if available + int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1; + const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev); + + assert(!DescRev || DescRev->getNumDefs() == NumDefs); + assert(!DescRev || DescRev->getNumOperands() == NumOps); + // e64 version if available, -1 otherwise int OpcodeE64 = AMDGPU::getVOPe64(Opcode); const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); @@ -605,8 +612,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, continue; } - if (i == 1 && Desc->isCommutable() && - fitsRegClass(DAG, Ops[0], RegClass)) { + if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) { unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass; assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass)); @@ -620,6 +626,9 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SDValue Tmp = Ops[1]; Ops[1] = Ops[0]; Ops[0] = Tmp; + + Desc = DescRev; + DescRev = 0; continue; } } @@ -655,10 +664,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) Ops.push_back(Node->getOperand(i)); - // Either create a complete new or update the current instruction - if (Promote2e64) - return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(), - Node->getVTList(), Ops.data(), Ops.size()); - else - return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + // Create a complete new instruction + return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(), + Node->getVTList(), Ops.data(), Ops.size()); } diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index b9b25b5ecfb..0bfcef562f0 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -158,6 +158,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } +unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { + + int NewOpc; + + // Try to map original to commuted opcode + if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) + return NewOpc; + + // Try to map commuted to original opcode + if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) + return NewOpc; + + return Opcode; +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { @@ -165,7 +180,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, !MI->getOperand(2).isReg()) return 0; - return TargetInstrInfo::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstruction(MI, NewMI); + + if (MI) + MI->setDesc(get(commuteOpcode(MI->getOpcode()))); + + return MI; } MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 5789af5d211..d4e60e50863 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -35,6 +35,8 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + unsigned commuteOpcode(unsigned Opcode) const; + virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI=false) const; @@ -76,6 +78,8 @@ public: namespace AMDGPU { int getVOPe64(uint16_t Opcode); + int getCommuteRev(uint16_t Opcode); + int getCommuteOrig(uint16_t Opcode); } // End namespace AMDGPU diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 7af1a879df9..617f0b871c2 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -138,6 +138,11 @@ class VOP { string OpName = opName; } +class VOP2_REV { + string RevOp = revOp; + bit IsOrig = isOrig; +} + multiclass VOP1_Helper op, RegisterClass drc, RegisterClass src, string opName, list pattern> { @@ -166,11 +171,11 @@ multiclass VOP1_64 op, string opName, list pattern> : VOP1_Helper ; multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc, - string opName, list pattern> { + string opName, list pattern, string revOp> { def _e32 : VOP2 < op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >, VOP ; + >, VOP , VOP2_REV; def _e64 : VOP3 < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -179,23 +184,26 @@ multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - >, VOP { + >, VOP , VOP2_REV { let SRC2 = SIOperand.ZERO; } } -multiclass VOP2_32 op, string opName, list pattern> - : VOP2_Helper ; +multiclass VOP2_32 op, string opName, list pattern, + string revOp = opName> + : VOP2_Helper ; -multiclass VOP2_64 op, string opName, list pattern> - : VOP2_Helper ; +multiclass VOP2_64 op, string opName, list pattern, + string revOp = opName> + : VOP2_Helper ; -multiclass VOP2b_32 op, string opName, list pattern> { +multiclass VOP2b_32 op, string opName, list pattern, + string revOp = opName> { def _e32 : VOP2 < op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >, VOP ; + >, VOP , VOP2_REV; def _e64 : VOP3b < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -204,7 +212,7 @@ multiclass VOP2b_32 op, string opName, list pattern> { i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - >, VOP { + >, VOP , VOP2_REV { let SRC2 = SIOperand.ZERO; /* the VOP2 variant puts the carry out into VCC, the VOP3 variant can write it into any SGPR. We currently don't use the carry out, @@ -327,4 +335,22 @@ def getVOPe64 : InstrMapping { let ValueCols = [["8"]]; } +// Maps an original opcode to its commuted version +def getCommuteRev : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + +// Maps an commuted opcode to its original version +def getCommuteOrig : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b4805212008..40aa0e2d4cb 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -804,13 +804,13 @@ let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] >; -} // End isCommutable = 1 defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] >; +defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">; +} // End isCommutable = 1 -defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; let isCommutable = 1 in { @@ -848,22 +848,20 @@ defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; -} // End isCommutable = 1 - defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))] >; -defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; +defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">; + defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))] >; -defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; +defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; + defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] >; -defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; - -let isCommutable = 1 in { +defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] @@ -884,25 +882,24 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; -let Defs = [VCC] in { // Carry-out goes to VCC -let isCommutable = 1 in { +let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; -} // End isCommutable = 1 defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; +defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; -defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>; let Uses = [VCC] in { // Carry-out comes from VCC defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>; +defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; } // End Uses = [VCC] -} // End Defs = [VCC] +} // End isCommutable = 1, Defs = [VCC] + defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll index 328451c1e73..423adb9da90 100644 --- a/test/CodeGen/R600/lshl.ll +++ b/test/CodeGen/R600/lshl.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -;CHECK: V_LSHL_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0 +;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0 define void @test(i32 %p) { %i = mul i32 %p, 2 diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll index 0d3f524b9bb..551eac1d76b 100644 --- a/test/CodeGen/R600/lshr.ll +++ b/test/CodeGen/R600/lshr.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -;CHECK: V_LSHR_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0 +;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 define void @test(i32 %p) { %i = udiv i32 %p, 2 diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll index 979074df2bb..28744e00c3c 100644 --- a/test/CodeGen/R600/mulhu.ll +++ b/test/CodeGen/R600/mulhu.ll @@ -2,7 +2,7 @@ ;CHECK: V_MOV_B32_e32 VGPR1, -1431655765 ;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0 -;CHECK-NEXT: V_LSHR_B32_e64 VGPR0, VGPR0, 1, 0, 0, 0, 0 +;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 define void @test(i32 %p) { %i = udiv i32 %p, 3