R600/SI: add float vector types

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177276 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Christian Konig 2013-03-18 11:34:10 +00:00
parent b87082228b
commit 2d7f19e1e9
4 changed files with 82 additions and 21 deletions

View File

@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
>;
class Vector_Build <ValueType vecType, RegisterClass vectorClass,
ValueType elemType, RegisterClass elemClass> : Pat <
class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG

View File

@ -1979,8 +1979,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
// bitconvert patterns

View File

@ -1257,22 +1257,83 @@ defm : SamplePatterns<VReg_128, v4i32>;
defm : SamplePatterns<VReg_256, v8i32>;
defm : SamplePatterns<VReg_512, v16i32>;
def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
/********** ============================================ **********/
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
foreach Index = 0-2 in {
def Extract_Element_v2i32_#Index : Extract_Element <
i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v2i32_#Index : Insert_Element <
i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v2f32_#Index : Extract_Element <
f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v2f32_#Index : Insert_Element <
f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-3 in {
def Extract_Element_v4i32_#Index : Extract_Element <
i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v4i32_#Index : Insert_Element <
i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v4f32_#Index : Extract_Element <
f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v4f32_#Index : Insert_Element <
f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-7 in {
def Extract_Element_v8i32_#Index : Extract_Element <
i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v8i32_#Index : Insert_Element <
i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v8f32_#Index : Extract_Element <
f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v8f32_#Index : Insert_Element <
f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-15 in {
def Extract_Element_v16i32_#Index : Extract_Element <
i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v16i32_#Index : Insert_Element <
i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v16f32_#Index : Extract_Element <
f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v16f32_#Index : Insert_Element <
f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
>;
}
def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
def : Vector_Build <v4i32, VReg_128, i32, VReg_32>;
def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;

View File

@ -158,15 +158,15 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
// [SV]Src_* register classes, can have either an immediate or an register
@ -174,9 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;