diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0be0d034371..a53e0b9ae37 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -673,43 +673,67 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate, return false; } +const TargetRegisterClass *SITargetLowering::getRegClassForNode( + SelectionDAG &DAG, const SDValue &Op) const { + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + + if (!Op->isMachineOpcode()) { + switch(Op->getOpcode()) { + case ISD::CopyFromReg: { + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + unsigned Reg = cast(Op->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + return MRI.getRegClass(Reg); + } + return TRI.getPhysRegClass(Reg); + } + default: return NULL; + } + } + const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode()); + int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass; + if (OpClassID != -1) { + return TRI.getRegClass(OpClassID); + } + switch(Op.getMachineOpcode()) { + case AMDGPU::COPY_TO_REGCLASS: + // Operand 1 is the register class id for COPY_TO_REGCLASS instructions. + OpClassID = cast(Op->getOperand(1))->getZExtValue(); + + // If the COPY_TO_REGCLASS instruction is copying to a VSrc register + // class, then the register class for the value could be either a + // VReg or and SReg. In order to get a more accurate + if (OpClassID == AMDGPU::VSrc_32RegClassID || + OpClassID == AMDGPU::VSrc_64RegClassID) { + return getRegClassForNode(DAG, Op.getOperand(0)); + } + return TRI.getRegClass(OpClassID); + case AMDGPU::EXTRACT_SUBREG: { + int SubIdx = cast(Op.getOperand(1))->getZExtValue(); + const TargetRegisterClass *SuperClass = + getRegClassForNode(DAG, Op.getOperand(0)); + return TRI.getSubClassWithSubReg(SuperClass, SubIdx); + } + case AMDGPU::REG_SEQUENCE: + // Operand 0 is the register class id for REG_SEQUENCE instructions. + return TRI.getRegClass( + cast(Op.getOperand(0))->getZExtValue()); + default: + return getRegClassFor(Op.getSimpleValueType()); + } +} + /// \brief Does "Op" fit into register class "RegClass" ? bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op, unsigned RegClass) const { - - MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - SDNode *Node = Op.getNode(); - - const TargetRegisterClass *OpClass; const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - if (MachineSDNode *MN = dyn_cast(Node)) { - const SIInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); - const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode()); - int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass; - if (OpClassID == -1) { - switch (MN->getMachineOpcode()) { - case AMDGPU::REG_SEQUENCE: - // Operand 0 is the register class id for REG_SEQUENCE instructions. - OpClass = TRI->getRegClass( - cast(MN->getOperand(0))->getZExtValue()); - break; - default: - OpClass = getRegClassFor(Op.getSimpleValueType()); - break; - } - } else { - OpClass = TRI->getRegClass(OpClassID); - } - - } else if (Node->getOpcode() == ISD::CopyFromReg) { - RegisterSDNode *Reg = cast(Node->getOperand(1).getNode()); - OpClass = MRI.getRegClass(Reg->getReg()); - - } else + const TargetRegisterClass *RC = getRegClassForNode(DAG, Op); + if (!RC) { return false; - - return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass); + } + return TRI->getRegClass(RegClass)->hasSubClassEq(RC); } /// \brief Make sure that we don't exeed the number of allowed scalars diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 08c1d175a59..b4202c475d4 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -30,6 +30,8 @@ class SITargetLowering : public AMDGPUTargetLowering { bool foldImm(SDValue &Operand, int32_t &Immediate, bool &ScalarSlotUsed) const; + const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG, + const SDValue &Op) const; bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op, unsigned RegClass) const; void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index ddfc54e7b03..50fd4c7ed56 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -49,3 +49,24 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( case MVT::i32: return &AMDGPU::VReg_32RegClass; } } + +const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { + assert(!TargetRegisterInfo::isVirtualRegister(Reg)); + + const TargetRegisterClass *BaseClasses[] = { + &AMDGPU::VReg_32RegClass, + &AMDGPU::SReg_32RegClass, + &AMDGPU::VReg_64RegClass, + &AMDGPU::SReg_64RegClass, + &AMDGPU::SReg_128RegClass, + &AMDGPU::SReg_256RegClass + }; + + for (unsigned i = 0, e = sizeof(BaseClasses) / + sizeof(const TargetRegisterClass*); i != e; ++i) { + if (BaseClasses[i]->contains(Reg)) { + return BaseClasses[i]; + } + } + return NULL; +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index c322f94914c..d0df4f9de60 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -41,6 +41,10 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// \brief get the register class of the specified type to use in the /// CFGStructurizer virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; + + /// \brief Return the 'base' register class for this register. + /// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc. + const TargetRegisterClass *getPhysRegClass(unsigned Reg) const; }; } // End namespace llvm diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll index 501c5567fff..cdccdfaad8f 100644 --- a/test/CodeGen/R600/bfi_int.ll +++ b/test/CodeGen/R600/bfi_int.ll @@ -38,7 +38,7 @@ entry: ; R600-CHECK: @bfi_sha256_ma ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W -; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}} +; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, VGPR[0-9]+}} ; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}} define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll index 6b321f03622..0adcdfc4097 100644 --- a/test/CodeGen/R600/llvm.SI.imageload.ll +++ b/test/CodeGen/R600/llvm.SI.imageload.ll @@ -1,15 +1,15 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15, 0, 0, -1 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 3, 0, 0, 0 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 2, 0, 0, 0 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 1, 0, 0, 0 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 4, 0, 0, 0 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, 0 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 5, 0, 0, 0 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 12, 0, 0, -1 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7, 0, 0, 0 -;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, -1 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15, 0, 0, -1 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 3, 0, 0, 0 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 2, 0, 0, 0 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 1, 0, 0, 0 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 4, 0, 0, 0 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, 0 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 5, 0, 0, 0 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 12, 0, 0, -1 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7, 0, 0, 0 +;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, -1 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0 diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll index de06354a564..76559961e65 100644 --- a/test/CodeGen/R600/llvm.SI.sample.ll +++ b/test/CodeGen/R600/llvm.SI.sample.ll @@ -1,21 +1,21 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8 +;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5 +;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9 +;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12 +;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7 +;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11 +;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14 +;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0 diff --git a/test/CodeGen/R600/llvm.SI.sampled.ll b/test/CodeGen/R600/llvm.SI.sampled.ll index 71b8ef5beaa..3b05551372c 100644 --- a/test/CodeGen/R600/llvm.SI.sampled.ll +++ b/test/CodeGen/R600/llvm.SI.sampled.ll @@ -1,21 +1,21 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 3 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 2 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 1 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 4 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8 -;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 5 -;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 9 -;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 6 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 10 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 12 -;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7 -;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11 -;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14 -;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 3 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 2 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 1 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 4 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8 +;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 5 +;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 9 +;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 6 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 10 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 12 +;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7 +;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11 +;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14 +;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0 diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll index 9e29b0d003d..806e681ba3d 100644 --- a/test/CodeGen/R600/lshl.ll +++ b/test/CodeGen/R600/lshl.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: V_LSHL_B32_e64 VGPR{{[0-9]+}}, {{[SV]GPR[0-9]+}}, 1 +;CHECK: V_LSHL_B32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 1 define void @test(i32 %p) { %i = mul i32 %p, 2 diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll index eab3fbf6007..cfbcc3484d4 100644 --- a/test/CodeGen/R600/lshr.ll +++ b/test/CodeGen/R600/lshr.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: V_LSHR_B32_e64 {{VGPR[0-9]+}}, {{[SV]GPR[0-9]+}}, 1 +;CHECK: V_LSHR_B32_e64 {{VGPR[0-9]}}, SGPR{{[0-9]}}, 1 define void @test(i32 %p) { %i = udiv i32 %p, 2