mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
R600/SI: Don't generate non-existent LSHL, LSHR, ASHR B32 variants on VI
This can happen when a REV instruction is commuted. The trick is not to define the _vi versions of instructions, which has these consequences: - code generation will always fail if a pseudo cannot be lowered (very useful to catch bugs where an unsupported instruction somehow makes it to the printer) - ability to query if a pseudo can be lowered, which is done in commuteOpcode to prevent REV from commuting to non-REV on VI Tested-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227990 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2ea95bd471
commit
a95296a86e
@ -407,11 +407,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
|
||||
int NewOpc;
|
||||
|
||||
// Try to map original to commuted opcode
|
||||
if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
|
||||
NewOpc = AMDGPU::getCommuteRev(Opcode);
|
||||
// Check if the commuted (REV) opcode exists on the target.
|
||||
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
|
||||
return NewOpc;
|
||||
|
||||
// Try to map commuted to original opcode
|
||||
if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
|
||||
NewOpc = AMDGPU::getCommuteOrig(Opcode);
|
||||
// Check if the original (non-REV) opcode exists on the target.
|
||||
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
|
||||
return NewOpc;
|
||||
|
||||
return Opcode;
|
||||
|
@ -944,15 +944,26 @@ multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
|
||||
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
|
||||
def _si : VOP3_Real_si <op.SI3,
|
||||
outs, ins, asm, opName>,
|
||||
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
|
||||
VOP3DisableFields<1, 0, HasMods>;
|
||||
|
||||
def _vi : VOP3_Real_vi <op.VI3,
|
||||
outs, ins, asm, opName>,
|
||||
def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
|
||||
VOP3DisableFields<1, 0, HasMods>;
|
||||
}
|
||||
|
||||
multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp,
|
||||
bit HasMods = 1, bit UseFullOp = 0> {
|
||||
|
||||
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
|
||||
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||
|
||||
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
|
||||
VOP3DisableFields<1, 0, HasMods>;
|
||||
|
||||
// No VI instruction. This class is for SI only.
|
||||
}
|
||||
|
||||
multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, string opName, string revOp,
|
||||
bit HasMods = 1, bit UseFullOp = 0> {
|
||||
@ -1072,6 +1083,21 @@ multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
|
||||
revOp, P.HasModifiers
|
||||
>;
|
||||
|
||||
multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
|
||||
SDPatternOperator node = null_frag,
|
||||
string revOp = opName> {
|
||||
defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
|
||||
|
||||
defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
|
||||
!if(P.HasModifiers,
|
||||
[(set P.DstVT:$dst,
|
||||
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
|
||||
i1:$clamp, i32:$omod)),
|
||||
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
|
||||
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
|
||||
opName, revOp, P.HasModifiers>;
|
||||
}
|
||||
|
||||
multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
|
||||
dag ins32, string asm32, list<dag> pat32,
|
||||
dag ins64, string asm64, list<dag> pat64,
|
||||
|
@ -1540,21 +1540,21 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m <
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
|
||||
defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmin_legacy
|
||||
>;
|
||||
defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
|
||||
defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmax_legacy
|
||||
>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
|
||||
defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
|
||||
defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
|
||||
defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32",
|
||||
VOP_I32_I32_I32, sra
|
||||
>;
|
||||
|
||||
let hasPostISelHook = 1 in {
|
||||
defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
|
||||
defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
|
||||
}
|
||||
|
||||
} // End isCommutable = 1
|
||||
|
@ -1,6 +1,6 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s
|
||||
|
||||
;EG-CHECK: {{^}}shl_v2i32:
|
||||
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
@ -10,6 +10,10 @@
|
||||
;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
;VI-CHECK: {{^}}shl_v2i32:
|
||||
;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
|
||||
%a = load <2 x i32> addrspace(1) * %in
|
||||
@ -31,6 +35,12 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
|
||||
;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
;VI-CHECK: {{^}}shl_v4i32:
|
||||
;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
|
||||
%a = load <4 x i32> addrspace(1) * %in
|
||||
@ -55,6 +65,9 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
|
||||
;SI-CHECK: {{^}}shl_i64:
|
||||
;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK: {{^}}shl_i64:
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
|
||||
%a = load i64 addrspace(1) * %in
|
||||
@ -90,6 +103,10 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK: {{^}}shl_v2i64:
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
|
||||
%a = load <2 x i64> addrspace(1) * %in
|
||||
@ -147,6 +164,12 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
|
||||
;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK: {{^}}shl_v4i64:
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
|
||||
%a = load <4 x i64> addrspace(1) * %in
|
||||
|
@ -1,6 +1,6 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s
|
||||
|
||||
;EG-CHECK-LABEL: {{^}}ashr_v2i32:
|
||||
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
@ -10,6 +10,10 @@
|
||||
;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_v2i32:
|
||||
;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
|
||||
%a = load <2 x i32> addrspace(1) * %in
|
||||
@ -31,6 +35,12 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
|
||||
;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_v4i32:
|
||||
;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
|
||||
%a = load <4 x i32> addrspace(1) * %in
|
||||
@ -45,6 +55,10 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
|
||||
|
||||
;SI-CHECK-LABEL: {{^}}ashr_i64:
|
||||
;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_i64:
|
||||
;VI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
|
||||
|
||||
define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = sext i32 %in to i64
|
||||
@ -69,6 +83,10 @@ entry:
|
||||
|
||||
;SI-CHECK-LABEL: {{^}}ashr_i64_2:
|
||||
;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_i64_2:
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
entry:
|
||||
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
|
||||
@ -109,6 +127,10 @@ entry:
|
||||
;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_v2i64:
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
|
||||
%a = load <2 x i64> addrspace(1) * %in
|
||||
@ -174,6 +196,12 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
|
||||
;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
;VI-CHECK-LABEL: {{^}}ashr_v4i64:
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
|
||||
%a = load <4 x i64> addrspace(1) * %in
|
||||
|
Loading…
x
Reference in New Issue
Block a user