diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 8a3ab46ad22..417356d800f 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1648,57 +1648,6 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op, return TRI->getRegClass(RegClass)->hasSubClassEq(RC); } -/// \brief Make sure that we don't exeed the number of allowed scalars -void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, - unsigned RegClass, - bool &ScalarSlotUsed) const { - - if (!isVSrc(RegClass)) - return; - - // First map the operands register class to a destination class - switch (RegClass) { - case AMDGPU::VSrc_32RegClassID: - case AMDGPU::VCSrc_32RegClassID: - RegClass = AMDGPU::VReg_32RegClassID; - break; - case AMDGPU::VSrc_64RegClassID: - case AMDGPU::VCSrc_64RegClassID: - RegClass = AMDGPU::VReg_64RegClassID; - break; - default: - llvm_unreachable("Unknown vsrc reg class"); - } - - // Nothing to do if they fit naturally - if (fitsRegClass(DAG, Operand, RegClass)) - return; - - // If the scalar slot isn't used yet use it now - if (!ScalarSlotUsed) { - ScalarSlotUsed = true; - return; - } - - // This is a conservative aproach. It is possible that we can't determine the - // correct register class and copy too often, but better safe than sorry. - - SDNode *Node; - // We can't use COPY_TO_REGCLASS with FrameIndex arguments. - if (isa(Operand) || - isa(Operand)) { - unsigned Opcode = Operand.getValueType() == MVT::i32 ? - AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - Node = DAG.getMachineNode(Opcode, SDLoc(), Operand.getValueType(), - Operand); - } else { - SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32); - Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(), - Operand.getValueType(), Operand, RC); - } - Operand = SDValue(Node, 0); -} - /// \returns true if \p Node's operands are different from the SDValue list /// \p Ops static bool isNodeChanged(const SDNode *Node, const std::vector &Ops) { @@ -1710,8 +1659,9 @@ static bool isNodeChanged(const SDNode *Node, const std::vector &Ops) { return false; } -/// \brief Try to commute instructions and insert copies in order to satisfy the -/// operand constraints. +/// TODO: This needs to be removed. It's current primary purpose is to fold +/// immediates into operands when legal. The legalization parts are redundant +/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook. SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node, SelectionDAG &DAG) const { // Original encoding (either e32 or e64) @@ -1784,11 +1734,9 @@ SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node, // Is this a VSrc or SSrc operand? unsigned RegClass = Desc->OpInfo[Op].RegClass; if (isVSrc(RegClass) || isSSrc(RegClass)) { - // Try to fold the immediates - if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { - // Folding didn't work, make sure we don't hit the SReg limit. - ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); - } + // Try to fold the immediates. If this ends up with multiple constant bus + // uses, it will be legalized later. + foldImm(Ops[i], Immediate, ScalarSlotUsed); continue; } @@ -1938,6 +1886,8 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, const SIInstrInfo *TII = static_cast( getTargetMachine().getSubtargetImpl()->getInstrInfo()); + TII->legalizeOperands(MI); + if (TII->isMIMG(MI->getOpcode())) { unsigned VReg = MI->getOperand(0).getReg(); unsigned Writemask = MI->getOperand(1).getImm(); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index f953b482fdd..9cf4dbcb2fd 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -47,8 +47,6 @@ class SITargetLowering : public AMDGPUTargetLowering { const SDValue &Op) const; bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op, unsigned RegClass) const; - void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, - unsigned RegClass, bool &ScalarSlotUsed) const; SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const; void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 8369a0c8687..c1fc4b3a974 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -42,6 +42,10 @@ class InstSI pattern> : let TSFlags{10} = MUBUF; let TSFlags{11} = MTBUF; let TSFlags{12} = FLAT; + + // Most instructions require adjustments after selection to satisfy + // operand requirements. + let hasPostISelHook = 1; } class Enc32 { diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index fb5dad13845..ed8d9793af7 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -1394,20 +1394,39 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx }; - // First we need to consider the instruction's operand requirements before - // legalizing. Some operands are required to be SGPRs, but we are still - // bound by the constant bus requirement to only use one. - // - // If the operand's class is an SGPR, we can never move it. - for (unsigned i = 0; i < 3; ++i) { - int Idx = VOP3Idx[i]; - if (Idx == -1) - break; + for (const MachineOperand &MO : MI->implicit_operands()) { + // We only care about reads. + if (MO.isDef()) + continue; - if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) { - SGPRReg = MI->getOperand(Idx).getReg(); + if (MO.getReg() == AMDGPU::VCC) { + SGPRReg = AMDGPU::VCC; break; } + + if (MO.getReg() == AMDGPU::FLAT_SCR) { + SGPRReg = AMDGPU::FLAT_SCR; + break; + } + } + + + if (SGPRReg == AMDGPU::NoRegister) { + // First we need to consider the instruction's operand requirements before + // legalizing. Some operands are required to be SGPRs, but we are still + // bound by the constant bus requirement to only use one. + // + // If the operand's class is an SGPR, we can never move it. + for (unsigned i = 0; i < 3; ++i) { + int Idx = VOP3Idx[i]; + if (Idx == -1) + break; + + if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) { + SGPRReg = MI->getOperand(Idx).getReg(); + break; + } + } } for (unsigned i = 0; i < 3; ++i) { diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll index 61d95135a4a..f0b341b7a6b 100644 --- a/test/CodeGen/R600/fneg.f64.ll +++ b/test/CodeGen/R600/fneg.f64.ll @@ -50,7 +50,7 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { ; SI-LABEL: @fneg_fold ; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-NOT: XOR -; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], {{v\[[0-9]+:[0-9]+\]}} +; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]] define void @fneg_fold_f64(double addrspace(1)* %out, double %in) { %fsub = fsub double -0.0, %in %fmul = fmul double %fsub, %in diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index 72cd15c7d70..8631301b49c 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -59,7 +59,7 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { ; FUNC-LABEL: @fneg_fold ; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; SI-NOT: XOR -; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}} +; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]] define void @fneg_fold_f32(float addrspace(1)* %out, float %in) { %fsub = fsub float -0.0, %in %fmul = fmul float %fsub, %in diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/R600/rotl.ll index 8c86fb5aeab..a9dee8ca78c 100644 --- a/test/CodeGen/R600/rotl.ll +++ b/test/CodeGen/R600/rotl.ll @@ -8,7 +8,7 @@ ; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}} ; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]] -; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]] +; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]] define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) { entry: %0 = shl i32 %x, %y diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll index cc942c10a91..eb1176f5855 100644 --- a/test/CodeGen/R600/seto.ll +++ b/test/CodeGen/R600/seto.ll @@ -1,8 +1,8 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s - -;CHECK-LABEL: @main -;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}} +; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; CHECK-LABEL: @main +; CHECK: V_CMP_O_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]] +; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]] define void @main(float %p) { main_body: %c = fcmp oeq float %p, %p diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll index 33007fc754b..a78e8e6b45b 100644 --- a/test/CodeGen/R600/setuo.ll +++ b/test/CodeGen/R600/setuo.ll @@ -1,8 +1,8 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s - -;CHECK-LABEL: @main -;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}} +; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; CHECK-LABEL: @main +; CHECK: V_CMP_U_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]] +; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]] define void @main(float %p) { main_body: %c = fcmp une float %p, %p