From df4626ef15ba0eb5f571a3ee6314e5c388258927 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 23:24:17 +0000 Subject: [PATCH] R600/SI: Assign a register class to the $vaddr operand for MIMG instructions The previous code declared the operand as unknown:$vaddr, which made it possible for scalar registers to be used instead of vector registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188425 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIDefines.h | 6 ++ lib/Target/R600/SIISelLowering.cpp | 8 +- lib/Target/R600/SIInstrFormats.td | 3 + lib/Target/R600/SIInstrInfo.cpp | 5 ++ lib/Target/R600/SIInstrInfo.h | 2 +- lib/Target/R600/SIInstrInfo.td | 33 +++++--- lib/Target/R600/SIInstructions.td | 106 +++++++++++++++---------- test/CodeGen/R600/llvm.SI.imageload.ll | 44 ++++++++++ 8 files changed, 150 insertions(+), 57 deletions(-) diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 147578ce8d2..572ed6ae16a 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -11,6 +11,12 @@ #ifndef SIDEFINES_H_ #define SIDEFINES_H_ +namespace SIInstrFlags { +enum { + MIMG = 1 << 3 +}; +} + #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C #define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index a76e6ee3145..4631f8a3a8b 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1022,9 +1022,11 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, /// \brief Fold the instructions after slecting them SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); Node = AdjustRegClass(Node, DAG); - if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1) + if (TII->isMIMG(Node->getMachineOpcode())) adjustWritemask(Node, DAG); return foldOperands(Node, DAG); @@ -1034,7 +1036,9 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, /// bits set in the writemask void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - if (AMDGPU::isMIMG(MI->getOpcode()) == -1) + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + if (!TII->isMIMG(MI->getOpcode())) return; unsigned VReg = MI->getOperand(0).getReg(); diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 434aa7e0bf0..cd1bbcd670e 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -17,10 +17,12 @@ class InstSI pattern> : field bits<1> VM_CNT = 0; field bits<1> EXP_CNT = 0; field bits<1> LGKM_CNT = 0; + field bits<1> MIMG = 0; let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; let TSFlags{2} = LGKM_CNT; + let TSFlags{3} = MIMG; } class Enc32 pattern> : @@ -414,6 +416,7 @@ class MIMG op, dag outs, dag ins, string asm, list pattern> : let VM_CNT = 1; let EXP_CNT = 1; + let MIMG = 1; } def EXP : Enc64< diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 551ae8683ed..9bb4ad9abc5 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -15,6 +15,7 @@ #include "SIInstrInfo.h" #include "AMDGPUTargetMachine.h" +#include "SIDefines.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" @@ -224,6 +225,10 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { return RC != &AMDGPU::EXECRegRegClass; } +int SIInstrInfo::isMIMG(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::MIMG; +} + //===----------------------------------------------------------------------===// // Indirect addressing callbacks //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 87eff4d6c95..8d24ab4bf14 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -47,6 +47,7 @@ public: virtual bool isMov(unsigned Opcode) const; virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + int isMIMG(uint16_t Opcode) const; virtual int getIndirectIndexBegin(const MachineFunction &MF) const; @@ -80,7 +81,6 @@ namespace AMDGPU { int getVOPe64(uint16_t Opcode); int getCommuteRev(uint16_t Opcode); int getCommuteOrig(uint16_t Opcode); - int isMIMG(uint16_t Opcode); } // End namespace AMDGPU diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 302fa2475cf..71d20eaa0fb 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -397,11 +397,12 @@ class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF let mayStore = 0; } -class MIMG_NoSampler_Helper op, string asm> : MIMG < +class MIMG_NoSampler_Helper op, string asm, + RegisterClass src_rc> : MIMG < op, (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr, SReg_256:$srsrc), asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," #" $tfe, $lwe, $slc, $vaddr, $srsrc", @@ -412,11 +413,18 @@ class MIMG_NoSampler_Helper op, string asm> : MIMG < let hasPostISelHook = 1; } -class MIMG_Sampler_Helper op, string asm> : MIMG < +multiclass MIMG_NoSampler op, string asm> { + def _V1 : MIMG_NoSampler_Helper ; + def _V2 : MIMG_NoSampler_Helper ; + def _V4 : MIMG_NoSampler_Helper ; +} + +class MIMG_Sampler_Helper op, string asm, + RegisterClass src_rc> : MIMG < op, (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp), asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", @@ -426,6 +434,14 @@ class MIMG_Sampler_Helper op, string asm> : MIMG < let hasPostISelHook = 1; } +multiclass MIMG_Sampler op, string asm> { + def _V1 : MIMG_Sampler_Helper ; + def _V2 : MIMG_Sampler_Helper ; + def _V4 : MIMG_Sampler_Helper ; + def _V8 : MIMG_Sampler_Helper ; + def _V16 : MIMG_Sampler_Helper ; +} + //===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// @@ -457,13 +473,4 @@ def getCommuteOrig : InstrMapping { let ValueCols = [["1"]]; } -// Test if the supplied opcode is an MIMG instruction -def isMIMG : InstrMapping { - let FilterClass = "MIMG"; - let RowFields = ["Inst"]; - let ColFields = ["Size"]; - let KeyCol = ["8"]; - let ValueCols = [["8"]]; -} - include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 909c9762369..5fbd68f3e7c 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -500,8 +500,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; -def IMAGE_LOAD : MIMG_NoSampler_Helper <0x00000000, "IMAGE_LOAD">; -def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">; +defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">; +defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; //def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; @@ -510,7 +510,7 @@ def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; -def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">; +def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO", VReg_32>; //def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; //def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; //def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; @@ -528,20 +528,20 @@ def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">; //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; -def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">; +defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">; //def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; -def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">; +defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">; //def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; -def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">; -def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">; +defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">; +defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">; //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; -def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">; +defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">; //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; -def IMAGE_SAMPLE_C_D : MIMG_Sampler_Helper <0x0000002a, "IMAGE_SAMPLE_C_D">; +defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">; //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; -def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">; -def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; +defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">; +defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; @@ -1327,7 +1327,7 @@ def : Pat < /* int_SI_sample for simple 1D texture lookup */ def : Pat < (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm), - (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) + (IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SamplePattern : Pat < @@ -1358,33 +1358,51 @@ class SampleShadowArrayPattern; /* int_SI_sample* for texture lookups consuming more address parameters */ -multiclass SamplePatterns { - def : SamplePattern ; - def : SampleRectPattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; +multiclass SamplePatterns { + def : SamplePattern ; + def : SampleRectPattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; } -defm : SamplePatterns; -defm : SamplePatterns; -defm : SamplePatterns; -defm : SamplePatterns; +defm : SamplePatterns; +defm : SamplePatterns; +defm : SamplePatterns; +defm : SamplePatterns; /* int_SI_imageload for texture fetches consuming varying address parameters */ class ImageLoadPattern : Pat < @@ -1407,15 +1425,21 @@ class ImageLoadArrayMSAAPattern; -multiclass ImageLoadPatterns { - def : ImageLoadPattern ; - def : ImageLoadArrayPattern ; - def : ImageLoadMSAAPattern ; - def : ImageLoadArrayMSAAPattern ; +multiclass ImageLoadPatterns { + def : ImageLoadPattern ; + def : ImageLoadArrayPattern ; } -defm : ImageLoadPatterns; -defm : ImageLoadPatterns; +multiclass ImageLoadMSAAPatterns { + def : ImageLoadMSAAPattern ; + def : ImageLoadArrayMSAAPattern ; +} + +defm : ImageLoadPatterns; +defm : ImageLoadPatterns; + +defm : ImageLoadMSAAPatterns; +defm : ImageLoadMSAAPatterns; /* Image resource information */ def : Pat < diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll index 1ed4dd42ae6..3817034aa42 100644 --- a/test/CodeGen/R600/llvm.SI.imageload.ll +++ b/test/CodeGen/R600/llvm.SI.imageload.ll @@ -82,6 +82,50 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { ret void } +; Test that ccordinates are stored in vgprs and not sgprs +; CHECK: vgpr_coords +; CHECK: IMAGE_LOAD_MIP VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 15, 0, 0, 0, 0, 0, 0, 0, VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}} +define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { +main_body: + %20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0 + %21 = load float addrspace(2)* addrspace(2)* %20, !tbaa !0 + %22 = getelementptr float addrspace(2)* %21, i32 0 + %23 = load float addrspace(2)* %22, !tbaa !0, !invariant.load !1 + %24 = getelementptr float addrspace(2)* %21, i32 1 + %25 = load float addrspace(2)* %24, !tbaa !0, !invariant.load !1 + %26 = getelementptr float addrspace(2)* %21, i32 4 + %27 = load float addrspace(2)* %26, !tbaa !0, !invariant.load !1 + %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 0 + %29 = load <32 x i8> addrspace(2)* %28, !tbaa !0 + %30 = bitcast float %27 to i32 + %31 = bitcast float %23 to i32 + %32 = bitcast float %25 to i32 + %33 = insertelement <4 x i32> undef, i32 %31, i32 0 + %34 = insertelement <4 x i32> %33, i32 %32, i32 1 + %35 = insertelement <4 x i32> %34, i32 %30, i32 2 + %36 = insertelement <4 x i32> %35, i32 undef, i32 3 + %37 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %36, <32 x i8> %29, i32 2) + %38 = extractelement <4 x i32> %37, i32 0 + %39 = extractelement <4 x i32> %37, i32 1 + %40 = extractelement <4 x i32> %37, i32 2 + %41 = extractelement <4 x i32> %37, i32 3 + %42 = bitcast i32 %38 to float + %43 = bitcast i32 %39 to float + %44 = bitcast i32 %40 to float + %45 = bitcast i32 %41 to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %42, float %43, float %44, float %45) + ret void +} + declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <8 x i32>, i32) readnone +; Function Attrs: nounwind readnone +declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } + +!0 = metadata !{metadata !"const", null, i32 1} +!1 = metadata !{} +