diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 485c4ab0498..3c863ebbd3a 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -407,11 +407,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { int NewOpc; // Try to map original to commuted opcode - if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) + NewOpc = AMDGPU::getCommuteRev(Opcode); + // Check if the commuted (REV) opcode exists on the target. + if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) return NewOpc; // Try to map commuted to original opcode - if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) + NewOpc = AMDGPU::getCommuteOrig(Opcode); + // Check if the original (non-REV) opcode exists on the target. + if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) return NewOpc; return Opcode; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index a8f73392a14..3159f9fbc3d 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -944,15 +944,26 @@ multiclass VOP3_2_m , VOP2_REV; - def _si : VOP3_Real_si , + def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods>; - def _vi : VOP3_Real_vi , + def _vi : VOP3_Real_vi , VOP3DisableFields<1, 0, HasMods>; } +multiclass VOP3SI_2_m pattern, string opName, string revOp, + bit HasMods = 1, bit UseFullOp = 0> { + + def "" : VOP3_Pseudo , + VOP2_REV; + + def _si : VOP3_Real_si , + VOP3DisableFields<1, 0, HasMods>; + + // No VI instruction. This class is for SI only. +} + multiclass VOP3b_2_m pattern, string opName, string revOp, bit HasMods = 1, bit UseFullOp = 0> { @@ -1072,6 +1083,21 @@ multiclass VOP2Inst ; +multiclass VOP2InstSI { + defm _e32 : VOP2SI_m ; + + defm _e64 : VOP3SI_2_m ; +} + multiclass VOP2b_Helper pat32, dag ins64, string asm64, list pat64, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 75776c94301..39c3fd01507 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1540,21 +1540,21 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m < // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { -defm V_MIN_LEGACY_F32 : VOP2Inst , "v_min_legacy_f32", +defm V_MIN_LEGACY_F32 : VOP2InstSI , "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy >; -defm V_MAX_LEGACY_F32 : VOP2Inst , "v_max_legacy_f32", +defm V_MAX_LEGACY_F32 : VOP2InstSI , "v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy >; let isCommutable = 1 in { -defm V_LSHR_B32 : VOP2Inst , "v_lshr_b32", VOP_I32_I32_I32, srl>; -defm V_ASHR_I32 : VOP2Inst , "v_ashr_i32", +defm V_LSHR_B32 : VOP2InstSI , "v_lshr_b32", VOP_I32_I32_I32, srl>; +defm V_ASHR_I32 : VOP2InstSI , "v_ashr_i32", VOP_I32_I32_I32, sra >; let hasPostISelHook = 1 in { -defm V_LSHL_B32 : VOP2Inst , "v_lshl_b32", VOP_I32_I32_I32, shl>; +defm V_LSHL_B32 : VOP2InstSI , "v_lshl_b32", VOP_I32_I32_I32, shl>; } } // End isCommutable = 1 diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll index 75341a2306f..ff2f0960b65 100644 --- a/test/CodeGen/R600/shl.ll +++ b/test/CodeGen/R600/shl.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s ;EG-CHECK: {{^}}shl_v2i32: ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -10,6 +10,10 @@ ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: {{^}}shl_v2i32: +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32> addrspace(1) * %in @@ -31,6 +35,12 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: {{^}}shl_v4i32: +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32> addrspace(1) * %in @@ -55,6 +65,9 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ;SI-CHECK: {{^}}shl_i64: ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: {{^}}shl_i64: +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 %a = load i64 addrspace(1) * %in @@ -90,6 +103,10 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: {{^}}shl_v2i64: +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 %a = load <2 x i64> addrspace(1) * %in @@ -147,6 +164,12 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: {{^}}shl_v4i64: +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 %a = load <4 x i64> addrspace(1) * %in diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll index f062e4c4e6f..44c11013056 100644 --- a/test/CodeGen/R600/sra.ll +++ b/test/CodeGen/R600/sra.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s ;EG-CHECK-LABEL: {{^}}ashr_v2i32: ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -10,6 +10,10 @@ ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK-LABEL: {{^}}ashr_v2i32: +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32> addrspace(1) * %in @@ -31,6 +35,12 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK-LABEL: {{^}}ashr_v4i32: +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32> addrspace(1) * %in @@ -45,6 +55,10 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ;SI-CHECK-LABEL: {{^}}ashr_i64: ;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8 + +;VI-CHECK-LABEL: {{^}}ashr_i64: +;VI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8 + define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) { entry: %0 = sext i32 %in to i64 @@ -69,6 +83,10 @@ entry: ;SI-CHECK-LABEL: {{^}}ashr_i64_2: ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +;VI-CHECK-LABEL: {{^}}ashr_i64_2: +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { entry: %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 @@ -109,6 +127,10 @@ entry: ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK-LABEL: {{^}}ashr_v2i64: +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 %a = load <2 x i64> addrspace(1) * %in @@ -174,6 +196,12 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK-LABEL: {{^}}ashr_v4i64: +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 %a = load <4 x i64> addrspace(1) * %in