R600/SI: fix and cleanup SI register definition v2

Prevent producing real strange tablegen code by using
proper register sizes, alignments and hierarchy.

Also cleanup the unused definitions and add some comments.

v2: add SGPR 512 bit registers, stop registers from wrapping around,
    fix SGPR alignment

This is a candidate for the mesa-stable branch.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176098 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Christian Konig 2013-02-26 17:52:03 +00:00
parent a25b8d4c0b
commit 6fd49bc89a
2 changed files with 135 additions and 97 deletions

View File

@ -918,14 +918,15 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
def S_CSELECT_B32 : SOP2 < def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst), 0x0000000a, (outs SReg_32:$dst),
(ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
[(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
SReg_32:$src0, SReg_32:$src1))]
>; >;
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
// f32 pattern for S_CSELECT_B32 // f32 pattern for S_CSELECT_B32
def : Pat < def : Pat <
(f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
(S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
>; >;

View File

@ -1,30 +1,40 @@
//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
class SIReg <string n, bits<16> encoding = 0> : Register<n> { class SIReg <string n, bits<16> encoding = 0> : Register<n> {
let Namespace = "AMDGPU"; let Namespace = "AMDGPU";
let HWEncoding = encoding; let HWEncoding = encoding;
} }
class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = encoding;
}
class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
class VGPR_32 <bits<16> num, string name> : SIReg<name, num> {
let HWEncoding{8} = 1;
}
// Special Registers // Special Registers
def VCC : SIReg<"VCC", 106>; def VCC : SIReg<"VCC", 106>;
def EXEC_LO : SIReg <"EXEC LO", 126>; def EXEC : SIReg<"EXEC", 126>;
def EXEC_HI : SIReg <"EXEC HI", 127>;
def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
def SCC : SIReg<"SCC", 253>; def SCC : SIReg<"SCC", 253>;
def M0 : SIReg <"M0", 124>; def M0 : SIReg <"M0", 124>;
//Interpolation registers // SGPR registers
foreach Index = 0-101 in {
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
}
// VGPR registers
foreach Index = 0-255 in {
def VGPR#Index : SIReg <"VGPR"#Index, Index> {
let HWEncoding{8} = 1;
}
}
// virtual Interpolation registers
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
@ -50,102 +60,150 @@ def ANCILLARY : SIReg <"ANCILLARY">;
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
// SGPR 32-bit registers //===----------------------------------------------------------------------===//
foreach Index = 0-101 in { // Groupings using register classes and tuples
def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>; //===----------------------------------------------------------------------===//
}
// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, 101))>; (add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers // SGPR 64-bit registers
def SGPR_64 : RegisterTuples<[sub0, sub1], def SGPR_64 : RegisterTuples<[sub0, sub1],
[(add (decimate SGPR_32, 2)), [(add (decimate (trunc SGPR_32, 101), 2)),
(add(decimate (rotl SGPR_32, 1), 2))]>; (add (decimate (shl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers // SGPR 128-bit registers
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (decimate SGPR_32, 4)), [(add (decimate (trunc SGPR_32, 99), 4)),
(add (decimate (rotl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (rotl SGPR_32, 2), 4)), (add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (rotl SGPR_32, 3), 4))]>; (add (decimate (shl SGPR_32, 3), 4))]>;
// SGPR 256-bit registers // SGPR 256-bit registers
def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
[(add (decimate SGPR_32, 8)), [(add (decimate (trunc SGPR_32, 95), 4)),
(add (decimate (rotl SGPR_32, 1), 8)), (add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (rotl SGPR_32, 2), 8)), (add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (rotl SGPR_32, 3), 8)), (add (decimate (shl SGPR_32, 3), 4)),
(add (decimate (rotl SGPR_32, 4), 8)), (add (decimate (shl SGPR_32, 4), 4)),
(add (decimate (rotl SGPR_32, 5), 8)), (add (decimate (shl SGPR_32, 5), 4)),
(add (decimate (rotl SGPR_32, 6), 8)), (add (decimate (shl SGPR_32, 6), 4)),
(add (decimate (rotl SGPR_32, 7), 8))]>; (add (decimate (shl SGPR_32, 7), 4))]>;
// SGPR 512-bit registers
def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
[(add (decimate (trunc SGPR_32, 87), 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4)),
(add (decimate (shl SGPR_32, 4), 4)),
(add (decimate (shl SGPR_32, 5), 4)),
(add (decimate (shl SGPR_32, 6), 4)),
(add (decimate (shl SGPR_32, 7), 4)),
(add (decimate (shl SGPR_32, 8), 4)),
(add (decimate (shl SGPR_32, 9), 4)),
(add (decimate (shl SGPR_32, 10), 4)),
(add (decimate (shl SGPR_32, 11), 4)),
(add (decimate (shl SGPR_32, 12), 4)),
(add (decimate (shl SGPR_32, 13), 4)),
(add (decimate (shl SGPR_32, 14), 4)),
(add (decimate (shl SGPR_32, 15), 4))]>;
// VGPR 32-bit registers // VGPR 32-bit registers
foreach Index = 0-255 in {
def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>;
}
def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "VGPR%u", 0, 255))>; (add (sequence "VGPR%u", 0, 255))>;
// VGPR 64-bit registers // VGPR 64-bit registers
def VGPR_64 : RegisterTuples<[sub0, sub1], def VGPR_64 : RegisterTuples<[sub0, sub1],
[(add VGPR_32), [(add (trunc VGPR_32, 255)),
(add (rotl VGPR_32, 1))]>; (add (shl VGPR_32, 1))]>;
// VGPR 128-bit registers // VGPR 128-bit registers
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add VGPR_32), [(add (trunc VGPR_32, 253)),
(add (rotl VGPR_32, 1)), (add (shl VGPR_32, 1)),
(add (rotl VGPR_32, 2)), (add (shl VGPR_32, 2)),
(add (rotl VGPR_32, 3))]>; (add (shl VGPR_32, 3))]>;
// VGPR 256-bit registers // VGPR 256-bit registers
def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
[(add VGPR_32), [(add (trunc VGPR_32, 249)),
(add (rotl VGPR_32, 1)), (add (shl VGPR_32, 1)),
(add (rotl VGPR_32, 2)), (add (shl VGPR_32, 2)),
(add (rotl VGPR_32, 3)), (add (shl VGPR_32, 3)),
(add (rotl VGPR_32, 4)), (add (shl VGPR_32, 4)),
(add (rotl VGPR_32, 5)), (add (shl VGPR_32, 5)),
(add (rotl VGPR_32, 6)), (add (shl VGPR_32, 6)),
(add (rotl VGPR_32, 7))]>; (add (shl VGPR_32, 7))]>;
// VGPR 512-bit registers // VGPR 512-bit registers
def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
[(add VGPR_32), [(add (trunc VGPR_32, 241)),
(add (rotl VGPR_32, 1)), (add (shl VGPR_32, 1)),
(add (rotl VGPR_32, 2)), (add (shl VGPR_32, 2)),
(add (rotl VGPR_32, 3)), (add (shl VGPR_32, 3)),
(add (rotl VGPR_32, 4)), (add (shl VGPR_32, 4)),
(add (rotl VGPR_32, 5)), (add (shl VGPR_32, 5)),
(add (rotl VGPR_32, 6)), (add (shl VGPR_32, 6)),
(add (rotl VGPR_32, 7)), (add (shl VGPR_32, 7)),
(add (rotl VGPR_32, 8)), (add (shl VGPR_32, 8)),
(add (rotl VGPR_32, 9)), (add (shl VGPR_32, 9)),
(add (rotl VGPR_32, 10)), (add (shl VGPR_32, 10)),
(add (rotl VGPR_32, 11)), (add (shl VGPR_32, 11)),
(add (rotl VGPR_32, 12)), (add (shl VGPR_32, 12)),
(add (rotl VGPR_32, 13)), (add (shl VGPR_32, 13)),
(add (rotl VGPR_32, 14)), (add (shl VGPR_32, 14)),
(add (rotl VGPR_32, 15))]>; (add (shl VGPR_32, 15))]>;
//===----------------------------------------------------------------------===//
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
// Special register classes for predicates and the M0 register
def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
// Register class for all scalar registers (SGPRs + Special Registers) // Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add SGPR_32, M0, EXEC_LO, EXEC_HI) (add SGPR_32, M0Reg)
>; >;
def SReg_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SGPR_64, VCC, EXEC)>; def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
(add SGPR_64, VCCReg, EXECReg)
>;
def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers) // Register class for all vector registers (VGPRs + Interploation Registers)
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
(add VGPR_32,
def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
// [SV]Src_* register classes, can have either an immediate or an register
//===----------------------------------------------------------------------===//
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add VReg_32, SReg_32,
PERSP_SAMPLE_I, PERSP_SAMPLE_J, PERSP_SAMPLE_I, PERSP_SAMPLE_J,
PERSP_CENTER_I, PERSP_CENTER_J, PERSP_CENTER_I, PERSP_CENTER_J,
PERSP_CENTROID_I, PERSP_CENTROID_J, PERSP_CENTROID_I, PERSP_CENTROID_J,
@ -162,29 +220,8 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32,
ANCILLARY, ANCILLARY,
SAMPLE_COVERAGE, SAMPLE_COVERAGE,
POS_FIXED_PT POS_FIXED_PT
) )
>; >;
def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
// [SV]Src_* operands can have either an immediate or an register
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
def SSrc_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64, VReg_64)>;
// Special register classes for predicates and the M0 register
def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;