diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 3c863ebbd3a..7e2c0bd9ec2 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -2046,6 +2046,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { swapOperands(Inst); } break; + case AMDGPU::S_LSHL_B64: + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + NewOpcode = AMDGPU::V_LSHLREV_B64; + swapOperands(Inst); + } + break; + case AMDGPU::S_ASHR_I64: + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + NewOpcode = AMDGPU::V_ASHRREV_I64; + swapOperands(Inst); + } + break; + case AMDGPU::S_LSHR_B64: + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + NewOpcode = AMDGPU::V_LSHRREV_B64; + swapOperands(Inst); + } + break; case AMDGPU::S_BFE_U64: case AMDGPU::S_BFM_B64: diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 3159f9fbc3d..0b0e47a1a9d 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -802,6 +802,7 @@ def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> { } def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; +def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 39c3fd01507..6bc6a82e7ad 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1805,6 +1805,20 @@ defm V_MULLIT_F32 : VOP3Inst , "v_mullit_f32", } // End SubtargetPredicate = isSICI +let SubtargetPredicate = isVI in { + +defm V_LSHLREV_B64 : VOP3Inst , "v_lshlrev_b64", + VOP_I64_I32_I64 +>; +defm V_LSHRREV_B64 : VOP3Inst , "v_lshrrev_b64", + VOP_I64_I32_I64 +>; +defm V_ASHRREV_I64 : VOP3Inst , "v_ashrrev_i64", + VOP_I64_I32_I64 +>; + +} // End SubtargetPredicate = isVI + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll index f094ecef98f..6da17a4fea9 100644 --- a/test/CodeGen/R600/rotl.i64.ll +++ b/test/CodeGen/R600/rotl.i64.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s -; FUNC-LABEL: {{^}}s_rotl_i64: -; SI-DAG: s_lshl_b64 -; SI-DAG: s_sub_i32 -; SI-DAG: s_lshr_b64 -; SI: s_or_b64 -; SI: s_endpgm +; BOTH-LABEL: {{^}}s_rotl_i64: +; BOTH-DAG: s_lshl_b64 +; BOTH-DAG: s_sub_i32 +; BOTH-DAG: s_lshr_b64 +; BOTH: s_or_b64 +; BOTH: s_endpgm define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) { entry: %0 = shl i64 %x, %y @@ -17,13 +17,15 @@ entry: ret void } -; FUNC-LABEL: {{^}}v_rotl_i64: +; BOTH-LABEL: {{^}}v_rotl_i64: ; SI-DAG: v_lshl_b64 -; SI-DAG: v_sub_i32 +; VI-DAG: v_lshlrev_b64 +; BOTH-DAG: v_sub_i32 ; SI: v_lshr_b64 -; SI: v_or_b32 -; SI: v_or_b32 -; SI: s_endpgm +; VI: v_lshrrev_b64 +; BOTH: v_or_b32 +; BOTH: v_or_b32 +; BOTH: s_endpgm define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) { entry: %x = load i64 addrspace(1)* %xptr, align 8 diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll index a637f71921e..f1d1d265f36 100644 --- a/test/CodeGen/R600/rotr.i64.ll +++ b/test/CodeGen/R600/rotr.i64.ll @@ -1,11 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s -; FUNC-LABEL: {{^}}s_rotr_i64: -; SI-DAG: s_sub_i32 -; SI-DAG: s_lshr_b64 -; SI-DAG: s_lshl_b64 -; SI: s_or_b64 +; BOTH-LABEL: {{^}}s_rotr_i64: +; BOTH-DAG: s_sub_i32 +; BOTH-DAG: s_lshr_b64 +; BOTH-DAG: s_lshl_b64 +; BOTH: s_or_b64 define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) { entry: %tmp0 = sub i64 64, %y @@ -16,12 +16,14 @@ entry: ret void } -; FUNC-LABEL: {{^}}v_rotr_i64: -; SI-DAG: v_sub_i32 +; BOTH-LABEL: {{^}}v_rotr_i64: +; BOTH-DAG: v_sub_i32 ; SI-DAG: v_lshr_b64 ; SI-DAG: v_lshl_b64 -; SI: v_or_b32 -; SI: v_or_b32 +; VI-DAG: v_lshrrev_b64 +; VI-DAG: v_lshlrev_b64 +; BOTH: v_or_b32 +; BOTH: v_or_b32 define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) { entry: %x = load i64 addrspace(1)* %xptr, align 8 @@ -34,7 +36,7 @@ entry: ret void } -; FUNC-LABEL: {{^}}s_rotr_v2i64: +; BOTH-LABEL: {{^}}s_rotr_v2i64: define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) { entry: %tmp0 = sub <2 x i64> , %y @@ -45,7 +47,7 @@ entry: ret void } -; FUNC-LABEL: {{^}}v_rotr_v2i64: +; BOTH-LABEL: {{^}}v_rotr_v2i64: define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) { entry: %x = load <2 x i64> addrspace(1)* %xptr, align 8 diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll index ff2f0960b65..c6a18bf8e54 100644 --- a/test/CodeGen/R600/shl.ll +++ b/test/CodeGen/R600/shl.ll @@ -66,7 +66,7 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;VI-CHECK: {{^}}shl_i64: -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 @@ -104,8 +104,8 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;VI-CHECK: {{^}}shl_v2i64: -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 @@ -165,10 +165,10 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;VI-CHECK: {{^}}shl_v4i64: -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll index 44c11013056..7b461caa755 100644 --- a/test/CodeGen/R600/sra.ll +++ b/test/CodeGen/R600/sra.ll @@ -85,7 +85,7 @@ entry: ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;VI-CHECK-LABEL: {{^}}ashr_i64_2: -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { entry: @@ -128,8 +128,8 @@ entry: ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;VI-CHECK-LABEL: {{^}}ashr_v2i64: -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 @@ -197,10 +197,10 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;VI-CHECK-LABEL: {{^}}ashr_v4i64: -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1