From 36ba9091843bd1205fe3499ba4b55bbedc6583c9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 Feb 2013 17:02:09 +0000 Subject: [PATCH] R600/SI: Add basic support for more integer vector types. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v1i32, v2i32, v8i32 and v16i32. Only add VGPR register classes for integer vector types, to avoid attempts copying from VGPR to SGPR registers, which is not possible. Patch By: Michel Dänzer Signed-off-by: Michel Dänzer Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174632 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUAsmPrinter.cpp | 6 ++++ lib/Target/R600/AMDGPUInstructions.td | 51 +++++++++++++++++++++++++++ lib/Target/R600/SIISelLowering.cpp | 7 ++-- lib/Target/R600/SIInstructions.td | 16 +++++---- lib/Target/R600/SIRegisterInfo.td | 41 +++++++++++++++++++-- 5 files changed, 110 insertions(+), 11 deletions(-) diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 715a37822c4..082e7345d4f 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -118,6 +118,12 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { } else if (AMDGPU::SReg_256RegClass.contains(reg)) { isSGPR = true; width = 8; + } else if (AMDGPU::VReg_256RegClass.contains(reg)) { + isSGPR = false; + width = 8; + } else if (AMDGPU::VReg_512RegClass.contains(reg)) { + isSGPR = false; + width = 16; } else { assert(!"Unknown register class"); } diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 6765bc8da5a..0559a5ade51 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -191,6 +191,19 @@ class Insert_Element ; // Vector Build pattern +class Vector1_Build : Pat < + (vecType (build_vector (elemType elemClass:$src))), + (vecType elemClass:$src) +>; + +class Vector2_Build : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))), + (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) +>; + class Vector_Build : Pat < (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), @@ -200,6 +213,44 @@ class Vector_Build ; +class Vector8_Build : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1), + (elemType elemClass:$sub2), (elemType elemClass:$sub3), + (elemType elemClass:$sub4), (elemType elemClass:$sub5), + (elemType elemClass:$sub6), (elemType elemClass:$sub7))), + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1), + elemClass:$sub2, sub2), elemClass:$sub3, sub3), + elemClass:$sub4, sub4), elemClass:$sub5, sub5), + elemClass:$sub6, sub6), elemClass:$sub7, sub7) +>; + +class Vector16_Build : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1), + (elemType elemClass:$sub2), (elemType elemClass:$sub3), + (elemType elemClass:$sub4), (elemType elemClass:$sub5), + (elemType elemClass:$sub6), (elemType elemClass:$sub7), + (elemType elemClass:$sub8), (elemType elemClass:$sub9), + (elemType elemClass:$sub10), (elemType elemClass:$sub11), + (elemType elemClass:$sub12), (elemType elemClass:$sub13), + (elemType elemClass:$sub14), (elemType elemClass:$sub15))), + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1), + elemClass:$sub2, sub2), elemClass:$sub3, sub3), + elemClass:$sub4, sub4), elemClass:$sub5, sub5), + elemClass:$sub6, sub6), elemClass:$sub7, sub7), + elemClass:$sub8, sub8), elemClass:$sub9, sub9), + elemClass:$sub10, sub10), elemClass:$sub11, sub11), + elemClass:$sub12, sub12), elemClass:$sub13, sub13), + elemClass:$sub14, sub14), elemClass:$sub15, sub15) +>; + // bitconvert pattern class BitConvert : Pat < (dt (bitconvert (st rc:$src0))), diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index ef9d17c2dc5..afafa8ceeca 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -34,8 +34,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); - addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); + addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); + addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass); computeRegisterProperties(); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index d33e1139d26..dd779cfbe1a 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -584,7 +584,7 @@ defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", //defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; //defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", - [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))] + [(set (i32 VReg_32:$dst), (fp_to_sint AllReg_32:$src0))] >; defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; @@ -1000,17 +1000,17 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; //def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; -class V_MOV_IMM : InstSI < +class V_MOV_IMM : InstSI < (outs VReg_32:$dst), (ins immType:$src0), "V_MOV_IMM", - [(set VReg_32:$dst, (immNode:$src0))] + [(set VReg_32:$dst, (type immNode:$src0))] >; let isCodeGenOnly = 1, isPseudo = 1 in { -def V_MOV_IMM_I32 : V_MOV_IMM; -def V_MOV_IMM_F32 : V_MOV_IMM; +def V_MOV_IMM_I32 : V_MOV_IMM; +def V_MOV_IMM_F32 : V_MOV_IMM; def S_MOV_IMM_I32 : InstSI < (outs SReg_32:$dst), @@ -1227,8 +1227,12 @@ def : Insert_Element ; def : Insert_Element ; def : Insert_Element ; +def : Vector1_Build ; +def : Vector2_Build ; def : Vector_Build ; -def : Vector_Build ; +def : Vector_Build ; +def : Vector8_Build ; +def : Vector16_Build ; def : BitConvert ; def : BitConvert ; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index a1c7a866fd4..9b483eb8578 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -101,6 +101,37 @@ def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], (add (rotl VGPR_32, 2)), (add (rotl VGPR_32, 3))]>; +// VGPR 256-bit registers +def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], + [(add VGPR_32), + (add (rotl VGPR_32, 1)), + (add (rotl VGPR_32, 2)), + (add (rotl VGPR_32, 3)), + (add (rotl VGPR_32, 4)), + (add (rotl VGPR_32, 5)), + (add (rotl VGPR_32, 6)), + (add (rotl VGPR_32, 7))]>; + +// VGPR 512-bit registers +def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], + [(add VGPR_32), + (add (rotl VGPR_32, 1)), + (add (rotl VGPR_32, 2)), + (add (rotl VGPR_32, 3)), + (add (rotl VGPR_32, 4)), + (add (rotl VGPR_32, 5)), + (add (rotl VGPR_32, 6)), + (add (rotl VGPR_32, 7)), + (add (rotl VGPR_32, 8)), + (add (rotl VGPR_32, 9)), + (add (rotl VGPR_32, 10)), + (add (rotl VGPR_32, 11)), + (add (rotl VGPR_32, 12)), + (add (rotl VGPR_32, 13)), + (add (rotl VGPR_32, 14)), + (add (rotl VGPR_32, 15))]>; + // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI) @@ -115,7 +146,7 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, +def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32, PERSP_SAMPLE_I, PERSP_SAMPLE_J, PERSP_CENTER_I, PERSP_CENTER_J, @@ -136,9 +167,13 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, ) >; -def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>; +def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; -def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; + +def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; + +def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; // AllReg_* - A set of all scalar and vector registers of a given width. def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>;