mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
R600/SI: Enable a lot of existing tests for VI (squashed commits)
This is a union of these commits: * R600/SI: Enable more tests for VI which need no changes * R600/SI: Enable V_BCNT tests for VI Differences: - v_bcnt_..._e32 -> _e64 - s_load_dword* inline offset is in bytes instead of dwords * R600/SI: Enable all tests for VI which use S_LOAD_DWORD The inline offset is changed from dwords to bytes. * R600/SI: Enable LDS tests for VI Differences: - the s_load_dword inline offset changed from dwords to bytes - the tests checked very little on CI, so they have been fixed to check all instructions that "SI" checked * R600/SI: Enable lshr tests for VI * R600/SI: Fix divrem64 tests - "v_lshl_64" was missing "b" before "64" - added VI-NOT checks * R600/SI: Enable the SI.tid test for VI * R600/SI: Enable the frem test for VI Also, the frem_f64 checking is added for CI-VI. * R600/SI: Add VI tests for rsq.clamped git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228830 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f033db57e9
commit
c0021e43ea
@ -1,9 +1,12 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=SI
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI
|
||||
|
||||
; SI: {{^}}f64_kernel_arg:
|
||||
; GCN: {{^}}f64_kernel_arg:
|
||||
; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
|
||||
; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
|
||||
; SI: buffer_store_dwordx2
|
||||
; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
|
||||
; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
|
||||
; GCN: buffer_store_dwordx2
|
||||
define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
|
||||
entry:
|
||||
store double %in, double addrspace(1)* %out
|
||||
|
@ -1,14 +1,17 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
|
||||
; SI: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
|
||||
@ -18,16 +21,18 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
|
||||
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
|
||||
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
|
||||
; GCN: buffer_store_dwordx2 [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
|
||||
@ -38,8 +43,8 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
|
||||
; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
|
||||
; SI: s_endpgm
|
||||
; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
@ -51,13 +56,15 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
|
||||
; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
|
||||
; SI: s_endpgm
|
||||
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
|
||||
; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x28
|
||||
; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
|
||||
@ -66,15 +73,17 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
|
||||
; SI: s_endpgm
|
||||
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
|
||||
; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
@ -8,11 +9,11 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
|
||||
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_i32:
|
||||
; SI: s_load_dword [[SVAL:s[0-9]+]],
|
||||
; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
|
||||
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[VRESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: s_load_dword [[SVAL:s[0-9]+]],
|
||||
; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
|
||||
; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; GCN: buffer_store_dword [[VRESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
@ -23,10 +24,10 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
|
||||
; XXX - Why 0 in register?
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
@ -37,12 +38,13 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
|
||||
; SI: buffer_load_dword [[VAL1:v[0-9]+]],
|
||||
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
|
||||
; GCN: buffer_load_dword [[VAL1:v[0-9]+]],
|
||||
; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@ -57,11 +59,11 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
|
||||
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; SI-NEXT: s_waitcnt
|
||||
; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
|
||||
; GCN-NEXT: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
|
||||
%val0 = load i32 addrspace(1)* %in0, align 4
|
||||
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
|
||||
@ -71,9 +73,9 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: s_endpgm
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@ -85,11 +87,11 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: s_endpgm
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@ -103,15 +105,15 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: s_endpgm
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@ -129,23 +131,23 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: v_bcnt_u32_b32_e64
|
||||
; SI: s_endpgm
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: v_bcnt_u32_b32_e64
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@ -171,10 +173,10 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
@ -186,10 +188,10 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
@ -201,11 +203,12 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i32 addrspace(1)* %in, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
@ -215,11 +218,11 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
|
||||
@ -231,11 +234,11 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
|
||||
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
|
||||
@ -247,11 +250,12 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
|
||||
; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
|
||||
; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
|
||||
@ -269,10 +273,11 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
|
||||
|
||||
; FUNC-LABEL: {{^}}ctpop_i32_in_br:
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34
|
||||
; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
; EG: BCNT_INT
|
||||
define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
|
||||
entry:
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
|
||||
@ -8,10 +9,11 @@ declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_i64:
|
||||
; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
|
||||
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[VRESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; GCN: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
|
||||
; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; GCN: buffer_store_dword [[VRESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
|
||||
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
|
||||
%truncctpop = trunc i64 %ctpop to i32
|
||||
@ -20,11 +22,12 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i64:
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
|
||||
; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
|
||||
; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
|
||||
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i64 addrspace(1)* %in, align 8
|
||||
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
|
||||
@ -34,9 +37,9 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: s_bcnt1_i32_b64
|
||||
; GCN: s_bcnt1_i32_b64
|
||||
; GCN: s_endpgm
|
||||
define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
|
||||
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
|
||||
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
|
||||
@ -45,11 +48,11 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_v4i64:
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: s_bcnt1_i32_b64
|
||||
; GCN: s_bcnt1_i32_b64
|
||||
; GCN: s_bcnt1_i32_b64
|
||||
; GCN: s_bcnt1_i32_b64
|
||||
; GCN: s_endpgm
|
||||
define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
|
||||
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
|
||||
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
|
||||
@ -58,11 +61,11 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v2i64:
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: s_endpgm
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: s_endpgm
|
||||
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
|
||||
%val = load <2 x i64> addrspace(1)* %in, align 16
|
||||
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
|
||||
@ -72,15 +75,15 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v4i64:
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: s_endpgm
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: s_endpgm
|
||||
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
|
||||
%val = load <4 x i64> addrspace(1)* %in, align 32
|
||||
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
|
||||
@ -94,11 +97,12 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
|
||||
|
||||
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
|
||||
; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
|
||||
; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
|
||||
; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
|
||||
; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
|
||||
; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
|
||||
; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
|
||||
; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
|
||||
entry:
|
||||
%tmp0 = icmp eq i32 %cond, 0
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
|
||||
@ -10,7 +11,7 @@
|
||||
; R600-NOT: AND
|
||||
; R600: |PV.{{[XYZW]}}|
|
||||
|
||||
; SI: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
|
||||
define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
|
||||
%bc= bitcast i32 %in to float
|
||||
@ -23,7 +24,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
|
||||
; R600-NOT: AND
|
||||
; R600: |PV.{{[XYZW]}}|
|
||||
|
||||
; SI: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
|
||||
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
|
||||
%bc= bitcast i32 %in to float
|
||||
@ -35,7 +36,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) {
|
||||
; FUNC-LABEL: {{^}}fabs_f32:
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
|
||||
; SI: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
define void @fabs_f32(float addrspace(1)* %out, float %in) {
|
||||
%fabs = call float @llvm.fabs.f32(float %in)
|
||||
store float %fabs, float addrspace(1)* %out
|
||||
@ -46,8 +47,8 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) {
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
|
||||
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
|
||||
@ -60,20 +61,21 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
; GCN: v_and_b32
|
||||
define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
|
||||
store <4 x float> %fabs, <4 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}fabs_fn_fold:
|
||||
; GCN-LABEL: {{^}}fabs_fn_fold:
|
||||
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: and
|
||||
; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
|
||||
; GCN-NOT: and
|
||||
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
||||
%fabs = call float @fabs(float %in0)
|
||||
%fmul = fmul float %fabs, %in1
|
||||
@ -81,10 +83,11 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}fabs_fold:
|
||||
; GCN-LABEL: {{^}}fabs_fold:
|
||||
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: and
|
||||
; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
|
||||
; GCN-NOT: and
|
||||
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
||||
%fabs = call float @llvm.fabs.f32(float %in0)
|
||||
%fmul = fmul float %fabs, %in1
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.ceil.f64(double) nounwind readnone
|
||||
declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
|
||||
@ -10,12 +11,14 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read
|
||||
; FUNC-LABEL: {{^}}test_copysign_f32:
|
||||
; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
|
||||
; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
|
||||
; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
|
||||
; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0x2c
|
||||
; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30
|
||||
; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
|
||||
; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BFI_INT
|
||||
define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
|
||||
@ -25,7 +28,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_v2f32:
|
||||
; SI: s_endpgm
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BFI_INT
|
||||
; EG: BFI_INT
|
||||
@ -36,7 +39,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_v4f32:
|
||||
; SI: s_endpgm
|
||||
; GCN: s_endpgm
|
||||
|
||||
; EG: BFI_INT
|
||||
; EG: BFI_INT
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.copysign.f64(double, double) nounwind readnone
|
||||
declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
|
||||
@ -7,13 +8,15 @@ declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind r
|
||||
; FUNC-LABEL: {{^}}test_copysign_f64:
|
||||
; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
|
||||
; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
|
||||
; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
|
||||
; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; GCN: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
|
||||
; GCN: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
|
||||
%result = call double @llvm.copysign.f64(double %mag, double %sign)
|
||||
store double %result, double addrspace(1)* %out, align 8
|
||||
@ -21,7 +24,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_v2f64:
|
||||
; SI: s_endpgm
|
||||
; GCN: s_endpgm
|
||||
define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
|
||||
%result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
|
||||
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
|
||||
@ -29,7 +32,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_v4f64:
|
||||
; SI: s_endpgm
|
||||
; GCN: s_endpgm
|
||||
define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
|
||||
%result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
|
||||
store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.floor.f64(double) nounwind readnone
|
||||
declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
|
||||
|
@ -1,7 +1,8 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_f64:
|
||||
; SI: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
define void @fneg_f64(double addrspace(1)* %out, double %in) {
|
||||
%fneg = fsub double -0.000000e+00, %in
|
||||
store double %fneg, double addrspace(1)* %out
|
||||
@ -9,8 +10,8 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_v2f64:
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
|
||||
%fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
|
||||
store <2 x double> %fneg, <2 x double> addrspace(1)* %out
|
||||
@ -23,10 +24,10 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double>
|
||||
; R600: -PV
|
||||
; R600: -PV
|
||||
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
|
||||
%fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
|
||||
store <4 x double> %fneg, <4 x double> addrspace(1)* %out
|
||||
@ -39,7 +40,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_free_f64:
|
||||
; FIXME: Unnecessary copy to VGPRs
|
||||
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
|
||||
; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
|
||||
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
%bc = bitcast i64 %in to double
|
||||
%fsub = fsub double 0.0, %bc
|
||||
@ -47,10 +48,11 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}fneg_fold_f64:
|
||||
; GCN-LABEL: {{^}}fneg_fold_f64:
|
||||
; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-NOT: xor
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
|
||||
; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; GCN-NOT: xor
|
||||
; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
|
||||
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
|
||||
%fsub = fsub double -0.0, %in
|
||||
%fmul = fmul double %fsub, %in
|
||||
|
@ -1,10 +1,11 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_f32:
|
||||
; R600: -PV
|
||||
|
||||
; SI: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
define void @fneg_f32(float addrspace(1)* %out, float %in) {
|
||||
%fneg = fsub float -0.000000e+00, %in
|
||||
store float %fneg, float addrspace(1)* %out
|
||||
@ -15,8 +16,8 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) {
|
||||
; R600: -PV
|
||||
; R600: -PV
|
||||
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
|
||||
%fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
|
||||
store <2 x float> %fneg, <2 x float> addrspace(1)* %out
|
||||
@ -29,10 +30,10 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i
|
||||
; R600: -PV
|
||||
; R600: -PV
|
||||
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_xor_b32
|
||||
define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
|
||||
%fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
|
||||
store <4 x float> %fneg, <4 x float> addrspace(1)* %out
|
||||
@ -48,7 +49,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i
|
||||
; R600: -KC0[2].Z
|
||||
|
||||
; XXX: We could use v_add_f32_e64 with the negate bit here instead.
|
||||
; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
|
||||
; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
|
||||
define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
%bc = bitcast i32 %in to float
|
||||
%fsub = fsub float 0.0, %bc
|
||||
@ -58,8 +59,9 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fold_f32:
|
||||
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: xor
|
||||
; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
|
||||
; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
|
||||
; GCN-NOT: xor
|
||||
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
|
||||
define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
|
||||
%fsub = fsub float -0.0, %in
|
||||
%fmul = fmul float %fsub, %in
|
||||
|
@ -1,16 +1,18 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}frem_f32:
|
||||
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
|
||||
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
|
||||
; SI-DAG: v_cmp
|
||||
; SI-DAG: v_mul_f32
|
||||
; SI: v_rcp_f32_e32
|
||||
; SI: v_mul_f32_e32
|
||||
; SI: v_mul_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_mad_f32
|
||||
; SI: s_endpgm
|
||||
; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
|
||||
; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
|
||||
; GCN-DAG: v_cmp
|
||||
; GCN-DAG: v_mul_f32
|
||||
; GCN: v_rcp_f32_e32
|
||||
; GCN: v_mul_f32_e32
|
||||
; GCN: v_mul_f32_e32
|
||||
; GCN: v_trunc_f32_e32
|
||||
; GCN: v_mad_f32
|
||||
; GCN: s_endpgm
|
||||
define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) #0 {
|
||||
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
|
||||
@ -22,14 +24,14 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}unsafe_frem_f32:
|
||||
; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
|
||||
; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
|
||||
; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
|
||||
; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
|
||||
; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}}
|
||||
; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
|
||||
; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
|
||||
; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
|
||||
; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) #1 {
|
||||
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
|
||||
@ -40,11 +42,19 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: This should check something when f64 fdiv is implemented
|
||||
; correctly
|
||||
|
||||
; FUNC-LABEL: {{^}}frem_f64:
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
|
||||
; GCN: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
|
||||
; TODO: Check SI.
|
||||
; CI: v_rcp_f64_e32 [[INVY:v\[[0-9]+:[0-9]+\]]], [[Y]]
|
||||
; CI: v_mul_f64 [[DIV:v\[[0-9]+:[0-9]+\]]], [[X]], [[INVY]]
|
||||
; CI: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[DIV]]
|
||||
; CI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], [[TRUNC]], [[Y]]
|
||||
; SI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, [[Y]]
|
||||
; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[RESULTM]]
|
||||
; GCN: buffer_store_dwordx2 [[RESULT]], {{.*}}, 0
|
||||
; GCN: s_endpgm
|
||||
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) #0 {
|
||||
%r0 = load double addrspace(1)* %in1, align 8
|
||||
@ -55,11 +65,12 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}unsafe_frem_f64:
|
||||
; SI: v_rcp_f64_e32
|
||||
; SI: v_mul_f64
|
||||
; GCN: v_rcp_f64_e32
|
||||
; GCN: v_mul_f64
|
||||
; SI: v_bfe_u32
|
||||
; SI: v_fma_f64
|
||||
; SI: s_endpgm
|
||||
; CI: v_trunc_f64_e32
|
||||
; GCN: v_fma_f64
|
||||
; GCN: s_endpgm
|
||||
define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) #1 {
|
||||
%r0 = load double addrspace(1)* %in1, align 8
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.trunc.f64(double) nounwind readnone
|
||||
declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
|
||||
|
||||
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
|
||||
; CHECK-LABEL: {{^}}use_gep_address_space:
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
|
||||
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
|
||||
@ -9,6 +10,7 @@
|
||||
; FUNC-LABEL: {{^}}float:
|
||||
; FIXME: We should be using s_load_dword here.
|
||||
; SI: buffer_load_dword
|
||||
; VI: s_load_dword
|
||||
|
||||
; EG-DAG: MOV {{\** *}}T2.X
|
||||
; EG-DAG: MOV {{\** *}}T3.X
|
||||
@ -31,6 +33,7 @@ entry:
|
||||
|
||||
; FIXME: We should be using s_load_dword here.
|
||||
; SI: buffer_load_dword
|
||||
; VI: s_load_dword
|
||||
|
||||
; EG-DAG: MOV {{\** *}}T2.X
|
||||
; EG-DAG: MOV {{\** *}}T3.X
|
||||
@ -53,7 +56,7 @@ entry:
|
||||
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
|
||||
|
||||
; FUNC-LABEL: {{^}}struct_foo_gv_load:
|
||||
; SI: s_load_dword
|
||||
; GCN: s_load_dword
|
||||
|
||||
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
|
||||
%gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
|
||||
@ -70,6 +73,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
|
||||
; FUNC-LABEL: {{^}}array_v1_gv_load:
|
||||
; FIXME: We should be using s_load_dword here.
|
||||
; SI: buffer_load_dword
|
||||
; VI: s_load_dword
|
||||
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
|
||||
%gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
|
||||
%load = load <1 x i32> addrspace(2)* %gep, align 4
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=CHECK %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CHECK %s
|
||||
|
||||
; Use a 64-bit value with lo bits that can be represented as an inline constant
|
||||
; CHECK-LABEL: {{^}}i64_imm_inline_lo:
|
||||
@ -303,7 +304,8 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -313,7 +315,8 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -323,7 +326,8 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -333,7 +337,8 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -343,7 +348,8 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -353,7 +359,8 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -363,7 +370,8 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -373,7 +381,8 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -383,7 +392,8 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -394,7 +404,8 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
|
||||
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -404,7 +415,8 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -414,7 +426,8 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_16_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -424,7 +437,8 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -434,7 +448,8 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -444,7 +459,8 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -454,7 +470,8 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_63_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
|
||||
@ -464,7 +481,8 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_64_f64
|
||||
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
|
||||
; CHECK: buffer_store_dwordx2 [[REG]]
|
||||
define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
|
||||
|
@ -1,11 +1,11 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG
|
||||
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
|
||||
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
|
||||
|
||||
; EG-LABEL: {{^}}i8_arg:
|
||||
; FUNC-LABEL: {{^}}i8_arg:
|
||||
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-LABEL: {{^}}i8_arg:
|
||||
; SI: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
|
||||
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
|
||||
entry:
|
||||
@ -14,10 +14,10 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}i8_zext_arg:
|
||||
; FUNC-LABEL: {{^}}i8_zext_arg:
|
||||
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-LABEL: {{^}}i8_zext_arg:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
|
||||
|
||||
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
|
||||
entry:
|
||||
@ -26,10 +26,10 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}i8_sext_arg:
|
||||
; FUNC-LABEL: {{^}}i8_sext_arg:
|
||||
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-LABEL: {{^}}i8_sext_arg:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
|
||||
|
||||
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
|
||||
entry:
|
||||
@ -38,10 +38,9 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}i16_arg:
|
||||
; FUNC-LABEL: {{^}}i16_arg:
|
||||
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-LABEL: {{^}}i16_arg:
|
||||
; SI: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
|
||||
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
|
||||
entry:
|
||||
@ -50,10 +49,10 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}i16_zext_arg:
|
||||
; FUNC-LABEL: {{^}}i16_zext_arg:
|
||||
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-LABEL: {{^}}i16_zext_arg:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
|
||||
|
||||
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
|
||||
entry:
|
||||
@ -62,10 +61,10 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}i16_sext_arg:
|
||||
; FUNC-LABEL: {{^}}i16_sext_arg:
|
||||
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-LABEL: {{^}}i16_sext_arg:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
|
||||
|
||||
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
|
||||
entry:
|
||||
@ -74,176 +73,170 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}i32_arg:
|
||||
; FUNC-LABEL: {{^}}i32_arg:
|
||||
; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
|
||||
; SI-LABEL: {{^}}i32_arg:
|
||||
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
|
||||
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
|
||||
entry:
|
||||
store i32 %in, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}f32_arg:
|
||||
; FUNC-LABEL: {{^}}f32_arg:
|
||||
; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
|
||||
; SI-LABEL: {{^}}f32_arg:
|
||||
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
|
||||
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
|
||||
entry:
|
||||
store float %in, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v2i8_arg:
|
||||
; FUNC-LABEL: {{^}}v2i8_arg:
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; SI-LABEL: {{^}}v2i8_arg:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
|
||||
entry:
|
||||
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v2i16_arg:
|
||||
; FUNC-LABEL: {{^}}v2i16_arg:
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; SI-LABEL: {{^}}v2i16_arg:
|
||||
; SI-DAG: buffer_load_ushort
|
||||
; SI-DAG: buffer_load_ushort
|
||||
; GCN-DAG: buffer_load_ushort
|
||||
; GCN-DAG: buffer_load_ushort
|
||||
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
|
||||
entry:
|
||||
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v2i32_arg:
|
||||
; FUNC-LABEL: {{^}}v2i32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
|
||||
; SI-LABEL: {{^}}v2i32_arg:
|
||||
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
|
||||
; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
|
||||
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v2f32_arg:
|
||||
; FUNC-LABEL: {{^}}v2f32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
|
||||
; SI-LABEL: {{^}}v2f32_arg:
|
||||
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
|
||||
; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
|
||||
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
|
||||
entry:
|
||||
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v3i8_arg:
|
||||
; FUNC-LABEL: {{^}}v3i8_arg:
|
||||
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
|
||||
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
|
||||
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
|
||||
; SI-LABEL: {{^}}v3i8_arg:
|
||||
define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
|
||||
entry:
|
||||
store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v3i16_arg:
|
||||
; FUNC-LABEL: {{^}}v3i16_arg:
|
||||
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
|
||||
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
|
||||
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
|
||||
; SI-LABEL: {{^}}v3i16_arg:
|
||||
define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
|
||||
entry:
|
||||
store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
; EG-LABEL: {{^}}v3i32_arg:
|
||||
; FUNC-LABEL: {{^}}v3i32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||
; SI-LABEL: {{^}}v3i32_arg:
|
||||
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
|
||||
; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
|
||||
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v3f32_arg:
|
||||
; FUNC-LABEL: {{^}}v3f32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||
; SI-LABEL: {{^}}v3f32_arg:
|
||||
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
|
||||
; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
|
||||
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
|
||||
entry:
|
||||
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v4i8_arg:
|
||||
; FUNC-LABEL: {{^}}v4i8_arg:
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; SI-LABEL: {{^}}v4i8_arg:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
|
||||
entry:
|
||||
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v4i16_arg:
|
||||
; FUNC-LABEL: {{^}}v4i16_arg:
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; SI-LABEL: {{^}}v4i16_arg:
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
|
||||
entry:
|
||||
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v4i32_arg:
|
||||
; FUNC-LABEL: {{^}}v4i32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
|
||||
; SI-LABEL: {{^}}v4i32_arg:
|
||||
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
|
||||
; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
|
||||
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v4f32_arg:
|
||||
; FUNC-LABEL: {{^}}v4f32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
|
||||
; SI-LABEL: {{^}}v4f32_arg:
|
||||
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
|
||||
; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
|
||||
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
|
||||
entry:
|
||||
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v8i8_arg:
|
||||
; FUNC-LABEL: {{^}}v8i8_arg:
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
@ -252,21 +245,20 @@ entry:
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; SI-LABEL: {{^}}v8i8_arg:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
|
||||
entry:
|
||||
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v8i16_arg:
|
||||
; FUNC-LABEL: {{^}}v8i16_arg:
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
@ -275,22 +267,21 @@ entry:
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; SI-LABEL: {{^}}v8i16_arg:
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
|
||||
entry:
|
||||
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v8i32_arg:
|
||||
; FUNC-LABEL: {{^}}v8i32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
|
||||
@ -299,15 +290,15 @@ entry:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
|
||||
; SI-LABEL: {{^}}v8i32_arg:
|
||||
; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
|
||||
; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
|
||||
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v8f32_arg:
|
||||
; FUNC-LABEL: {{^}}v8f32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
|
||||
@ -316,7 +307,6 @@ entry:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
|
||||
; SI-LABEL: {{^}}v8f32_arg:
|
||||
; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
|
||||
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
|
||||
entry:
|
||||
@ -324,7 +314,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v16i8_arg:
|
||||
; FUNC-LABEL: {{^}}v16i8_arg:
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
@ -341,30 +331,29 @@ entry:
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; EG: VTX_READ_8
|
||||
; SI-LABEL: {{^}}v16i8_arg:
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
; GCN: buffer_load_ubyte
|
||||
define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
|
||||
entry:
|
||||
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v16i16_arg:
|
||||
; FUNC-LABEL: {{^}}v16i16_arg:
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
@ -381,30 +370,29 @@ entry:
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; EG: VTX_READ_16
|
||||
; SI-LABEL: {{^}}v16i16_arg:
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
|
||||
entry:
|
||||
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v16i32_arg:
|
||||
; FUNC-LABEL: {{^}}v16i32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
|
||||
@ -421,15 +409,15 @@ entry:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
|
||||
; SI-LABEL: {{^}}v16i32_arg:
|
||||
; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
|
||||
; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
|
||||
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-LABEL: {{^}}v16f32_arg:
|
||||
; FUNC-LABEL: {{^}}v16f32_arg:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
|
||||
@ -446,8 +434,8 @@ entry:
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
|
||||
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
|
||||
; SI-LABEL: {{^}}v16f32_arg:
|
||||
; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
|
||||
; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
|
||||
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
|
||||
entry:
|
||||
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
|
||||
@ -455,18 +443,18 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}kernel_arg_i64:
|
||||
; SI: s_load_dwordx2
|
||||
; SI: s_load_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; GCN: s_load_dwordx2
|
||||
; GCN: s_load_dwordx2
|
||||
; GCN: buffer_store_dwordx2
|
||||
define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
|
||||
store i64 %a, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
|
||||
; XSI: s_load_dwordx2
|
||||
; XSI: s_load_dwordx2
|
||||
; XSI: buffer_store_dwordx2
|
||||
; XGCN: s_load_dwordx2
|
||||
; XGCN: s_load_dwordx2
|
||||
; XGCN: buffer_store_dwordx2
|
||||
; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
|
||||
; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
|
||||
; ret void
|
||||
|
@ -1,25 +1,29 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
|
||||
declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_div_fixup_f32:
|
||||
; GCN-LABEL: {{^}}test_div_fixup_f32:
|
||||
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
|
||||
; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
|
||||
%result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
|
||||
store float %result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_div_fixup_f64:
|
||||
; SI: v_div_fixup_f64
|
||||
; GCN-LABEL: {{^}}test_div_fixup_f64:
|
||||
; GCN: v_div_fixup_f64
|
||||
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
|
||||
%result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
|
||||
store double %result, double addrspace(1)* %out, align 8
|
||||
|
@ -1,25 +1,29 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
|
||||
declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_div_fmas_f32:
|
||||
; GCN-LABEL: {{^}}test_div_fmas_f32:
|
||||
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
|
||||
; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
|
||||
%result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
|
||||
store float %result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_div_fmas_f64:
|
||||
; SI: v_div_fmas_f64
|
||||
; GCN-LABEL: {{^}}test_div_fmas_f64:
|
||||
; GCN: v_div_fmas_f64
|
||||
define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
|
||||
%result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
|
||||
store double %result, double addrspace(1)* %out, align 8
|
||||
|
@ -1,9 +1,21 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}rsq_clamped_f64:
|
||||
; SI: v_rsq_clamp_f64_e32
|
||||
|
||||
; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3]
|
||||
; TODO: this constant should be folded:
|
||||
; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
|
||||
; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
|
||||
; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
|
||||
; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
|
||||
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
|
||||
|
||||
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
|
||||
%rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
|
||||
store double %rsq_clamped, double addrspace(1)* %out, align 8
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
|
||||
@ -6,7 +7,15 @@ declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}rsq_clamped_f32:
|
||||
; SI: v_rsq_clamp_f32_e32
|
||||
|
||||
; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}}
|
||||
; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
|
||||
; TODO: this constant should be folded:
|
||||
; VI: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff
|
||||
; VI: v_max_f32_e32 {{v[0-9]+}}, [[MIN]], [[MINFLT]]
|
||||
|
||||
; EG: RECIPSQRT_CLAMPED
|
||||
|
||||
define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
|
||||
%rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
|
||||
store float %rsq_clamped, float addrspace(1)* %out, align 4
|
||||
|
@ -1,7 +1,9 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
|
||||
|
||||
;CHECK: v_mbcnt_lo_u32_b32_e64
|
||||
;CHECK: v_mbcnt_hi_u32_b32_e32
|
||||
;GCN: v_mbcnt_lo_u32_b32_e64
|
||||
;SI: v_mbcnt_hi_u32_b32_e32
|
||||
;VI: v_mbcnt_hi_u32_b32_e64
|
||||
|
||||
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
|
||||
main_body:
|
||||
|
@ -1,3 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}round_f32:
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
|
||||
|
||||
; BOTH-LABEL: {{^}}local_i32_load
|
||||
; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
|
||||
|
@ -1,15 +1,16 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
|
||||
; EG: LDS_WRXCHG_RET *
|
||||
; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; SI: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -18,8 +19,8 @@ define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
|
||||
; EG: LDS_WRXCHG_RET *
|
||||
; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -30,12 +31,12 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
|
||||
; XXX - Is it really necessary to load 4 into VGPR?
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
|
||||
; EG: LDS_ADD_RET *
|
||||
; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; SI: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -44,8 +45,8 @@ define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
|
||||
; EG: LDS_ADD_RET *
|
||||
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -56,8 +57,8 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
|
||||
; EG: LDS_ADD_RET *
|
||||
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
|
||||
; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
@ -69,9 +70,9 @@ define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
|
||||
; EG: LDS_ADD_RET *
|
||||
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -80,9 +81,9 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
|
||||
; EG: LDS_ADD_RET *
|
||||
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
|
||||
@ -93,8 +94,8 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
|
||||
; EG: LDS_ADD_RET *
|
||||
; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
|
||||
; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; CIVI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
@ -106,8 +107,8 @@ define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
|
||||
; EG: LDS_SUB_RET *
|
||||
; SI: ds_sub_rtn_u32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_sub_rtn_u32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -116,8 +117,8 @@ define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
|
||||
; EG: LDS_SUB_RET *
|
||||
; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -127,9 +128,9 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
|
||||
; EG: LDS_SUB_RET *
|
||||
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -138,9 +139,9 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
|
||||
; EG: LDS_SUB_RET *
|
||||
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
|
||||
@ -150,8 +151,8 @@ define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
|
||||
; EG: LDS_AND_RET *
|
||||
; SI: ds_and_rtn_b32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_and_rtn_b32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -160,8 +161,8 @@ define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
|
||||
; EG: LDS_AND_RET *
|
||||
; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -171,8 +172,8 @@ define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
|
||||
; EG: LDS_OR_RET *
|
||||
; SI: ds_or_rtn_b32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_or_rtn_b32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -181,8 +182,8 @@ define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %pt
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
|
||||
; EG: LDS_OR_RET *
|
||||
; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -192,8 +193,8 @@ define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
|
||||
; EG: LDS_XOR_RET *
|
||||
; SI: ds_xor_rtn_b32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_xor_rtn_b32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -202,8 +203,8 @@ define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
|
||||
; EG: LDS_XOR_RET *
|
||||
; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -221,8 +222,8 @@ define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
|
||||
; EG: LDS_MIN_INT_RET *
|
||||
; SI: ds_min_rtn_i32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_rtn_i32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -231,8 +232,8 @@ define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
|
||||
; EG: LDS_MIN_INT_RET *
|
||||
; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -242,8 +243,8 @@ define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
|
||||
; EG: LDS_MAX_INT_RET *
|
||||
; SI: ds_max_rtn_i32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_rtn_i32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -252,8 +253,8 @@ define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
|
||||
; EG: LDS_MAX_INT_RET *
|
||||
; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -263,8 +264,8 @@ define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
|
||||
; EG: LDS_MIN_UINT_RET *
|
||||
; SI: ds_min_rtn_u32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_rtn_u32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -273,8 +274,8 @@ define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
|
||||
; EG: LDS_MIN_UINT_RET *
|
||||
; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -284,8 +285,8 @@ define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
|
||||
; EG: LDS_MAX_UINT_RET *
|
||||
; SI: ds_max_rtn_u32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_rtn_u32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
@ -294,8 +295,8 @@ define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
|
||||
; EG: LDS_MAX_UINT_RET *
|
||||
; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -304,19 +305,19 @@ define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
|
||||
; SI: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
|
||||
; SI: s_endpgm
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
|
||||
; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -325,19 +326,19 @@ define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
|
||||
; XXX - Is it really necessary to load 4 into VGPR?
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
|
||||
; SI: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0]
|
||||
; SI: s_endpgm
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; GCN: ds_add_u32 [[VPTR]], [[DATA]] [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
|
||||
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -346,8 +347,8 @@ define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
|
||||
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
|
||||
; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
|
||||
; SI: s_endpgm
|
||||
; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
@ -357,18 +358,18 @@ define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
|
||||
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
|
||||
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
|
||||
@ -377,8 +378,8 @@ define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
|
||||
; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; CIVI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
@ -388,16 +389,16 @@ define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
|
||||
; SI: ds_sub_u32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_sub_u32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
|
||||
; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -405,18 +406,18 @@ define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
|
||||
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
|
||||
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
|
||||
; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
|
||||
@ -424,16 +425,16 @@ define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
|
||||
; SI: ds_and_b32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_and_b32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
|
||||
; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -441,16 +442,16 @@ define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
|
||||
; SI: ds_or_b32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_or_b32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
|
||||
; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -458,16 +459,16 @@ define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
|
||||
; SI: ds_xor_b32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_xor_b32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
|
||||
; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -482,16 +483,16 @@ define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
; }
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
|
||||
; SI: ds_min_i32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_i32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
|
||||
; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -499,16 +500,16 @@ define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
|
||||
; SI: ds_max_i32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_i32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
|
||||
; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -516,16 +517,16 @@ define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
|
||||
; SI: ds_min_u32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_u32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
|
||||
; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
@ -533,16 +534,16 @@ define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
|
||||
; SI: ds_max_u32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_u32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
|
||||
; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
|
@ -1,8 +1,9 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
|
||||
; SI: ds_wrxchg_rtn_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_wrxchg_rtn_b64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -10,8 +11,8 @@ define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
|
||||
; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -20,8 +21,8 @@ define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64:
|
||||
; SI: ds_add_rtn_u64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_add_rtn_u64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -29,13 +30,14 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
|
||||
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
|
||||
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
|
||||
; GCN: buffer_store_dwordx2 [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i64 4
|
||||
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
|
||||
@ -44,11 +46,11 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; GCN: buffer_store_dwordx2 [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -56,8 +58,8 @@ define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
|
||||
; SI: ds_inc_rtn_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_inc_rtn_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
|
||||
@ -66,8 +68,8 @@ define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64:
|
||||
; SI: ds_sub_rtn_u64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_sub_rtn_u64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -75,8 +77,8 @@ define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
|
||||
; SI: ds_sub_rtn_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_sub_rtn_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -85,11 +87,11 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; GCN: buffer_store_dwordx2 [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -97,8 +99,8 @@ define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
|
||||
; SI: ds_dec_rtn_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_dec_rtn_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
|
||||
@ -107,8 +109,8 @@ define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64:
|
||||
; SI: ds_and_rtn_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_and_rtn_b64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -116,8 +118,8 @@ define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
|
||||
; SI: ds_and_rtn_b64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_and_rtn_b64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -126,8 +128,8 @@ define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64:
|
||||
; SI: ds_or_rtn_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_or_rtn_b64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -135,8 +137,8 @@ define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %pt
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
|
||||
; SI: ds_or_rtn_b64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_or_rtn_b64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -145,8 +147,8 @@ define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64:
|
||||
; SI: ds_xor_rtn_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_xor_rtn_b64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -154,8 +156,8 @@ define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
|
||||
; SI: ds_xor_rtn_b64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_xor_rtn_b64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -172,8 +174,8 @@ define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
||||
; }
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64:
|
||||
; SI: ds_min_rtn_i64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_rtn_i64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -181,8 +183,8 @@ define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
|
||||
; SI: ds_min_rtn_i64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_rtn_i64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -191,8 +193,8 @@ define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64:
|
||||
; SI: ds_max_rtn_i64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_rtn_i64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -200,8 +202,8 @@ define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
|
||||
; SI: ds_max_rtn_i64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_rtn_i64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -210,8 +212,8 @@ define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64:
|
||||
; SI: ds_min_rtn_u64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_rtn_u64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -219,8 +221,8 @@ define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
|
||||
; SI: ds_min_rtn_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_rtn_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -229,8 +231,8 @@ define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64:
|
||||
; SI: ds_max_rtn_u64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_rtn_u64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
@ -238,8 +240,8 @@ define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
|
||||
; SI: ds_max_rtn_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_rtn_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -248,16 +250,16 @@ define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64:
|
||||
; SI: ds_wrxchg_rtn_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_wrxchg_rtn_b64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
|
||||
; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -265,8 +267,8 @@ define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64:
|
||||
; SI: ds_add_u64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_add_u64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
@ -274,11 +276,12 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
|
||||
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
|
||||
; SI: s_endpgm
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
|
||||
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
|
||||
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i64 4
|
||||
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
|
||||
@ -286,18 +289,18 @@ define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; GCN: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
|
||||
; SI: ds_inc_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_inc_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
|
||||
@ -305,16 +308,16 @@ define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64:
|
||||
; SI: ds_sub_u64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_sub_u64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
|
||||
; SI: ds_sub_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_sub_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -322,18 +325,18 @@ define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
|
||||
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
|
||||
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
|
||||
; GCN: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
|
||||
; SI: ds_dec_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_dec_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
|
||||
@ -341,16 +344,16 @@ define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64:
|
||||
; SI: ds_and_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_and_b64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
|
||||
; SI: ds_and_b64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_and_b64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -358,16 +361,16 @@ define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64:
|
||||
; SI: ds_or_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_or_b64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
|
||||
; SI: ds_or_b64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_or_b64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -375,16 +378,16 @@ define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64:
|
||||
; SI: ds_xor_b64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_xor_b64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
|
||||
; SI: ds_xor_b64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_xor_b64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -399,16 +402,16 @@ define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
; }
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64:
|
||||
; SI: ds_min_i64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_i64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
|
||||
; SI: ds_min_i64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_i64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -416,16 +419,16 @@ define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64:
|
||||
; SI: ds_max_i64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_i64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
|
||||
; SI: ds_max_i64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_i64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -433,16 +436,16 @@ define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64:
|
||||
; SI: ds_min_u64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_u64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
|
||||
; SI: ds_min_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_min_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
@ -450,16 +453,16 @@ define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64:
|
||||
; SI: ds_max_u64
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_u64
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
|
||||
; SI: ds_max_u64 {{.*}} offset:32
|
||||
; SI: s_endpgm
|
||||
; GCN: ds_max_u64 {{.*}} offset:32
|
||||
; GCN: s_endpgm
|
||||
define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
|
||||
|
@ -1,13 +1,16 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
; Make sure there isn't an extra space between the instruction name and first operands.
|
||||
|
||||
; SI-LABEL: {{^}}add_f32:
|
||||
; GCN-LABEL: {{^}}add_f32:
|
||||
; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; VI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
|
||||
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
|
||||
%result = fadd float %a, %b
|
||||
store float %result, float addrspace(1)* %out
|
||||
|
@ -1,6 +1,8 @@
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
|
||||
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
|
@ -1,10 +1,18 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI %s
|
||||
|
||||
; FUNC-LABEL: {{^}}cluster_arg_loads:
|
||||
; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
|
||||
; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24
|
||||
; VI-NEXT: s_nop 0
|
||||
; VI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-NEXT: s_nop 0
|
||||
; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
|
||||
; VI-NEXT: s_nop 0
|
||||
; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38
|
||||
define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
|
||||
store i32 %x, i32 addrspace(1)* %out0, align 4
|
||||
store i32 %y, i32 addrspace(1)* %out1, align 4
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s
|
||||
|
||||
; When a frame index offset is more than 12-bits, make sure we don't store
|
||||
; it in mubuf's offset field.
|
||||
|
@ -1,4 +1,5 @@
|
||||
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
|
||||
;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
|
||||
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
|
||||
|
||||
;FUNC-LABEL: {{^}}test_sdiv:
|
||||
@ -35,39 +36,40 @@
|
||||
;EG: BFE_UINT
|
||||
;EG: BFE_UINT
|
||||
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI: v_bfe_u32
|
||||
;SI-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN: v_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%result = sdiv i64 %x, %y
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
@ -108,39 +110,40 @@ define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG: BFE_UINT
|
||||
;EG: AND_INT {{.*}}, 1,
|
||||
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%result = urem i64 %x, %y
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
@ -151,10 +154,11 @@ define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG: RECIP_UINT
|
||||
;EG-NOT: BFE_UINT
|
||||
|
||||
;SI-NOT: s_bfe_u32
|
||||
;SI-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN-NOT: s_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%1 = ashr i64 %x, 33
|
||||
%2 = ashr i64 %y, 33
|
||||
@ -167,10 +171,11 @@ define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG: RECIP_UINT
|
||||
;EG-NOT: BFE_UINT
|
||||
|
||||
;SI-NOT: s_bfe_u32
|
||||
;SI-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN-NOT: s_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%1 = ashr i64 %x, 33
|
||||
%2 = ashr i64 %y, 33
|
||||
@ -186,10 +191,11 @@ define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG-NOT: RECIP_UINT
|
||||
;EG-NOT: BFE_UINT
|
||||
|
||||
;SI-NOT: s_bfe_u32
|
||||
;SI: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN-NOT: s_bfe_u32
|
||||
;GCN: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%1 = ashr i64 %x, 40
|
||||
%2 = ashr i64 %y, 40
|
||||
@ -205,10 +211,11 @@ define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG-NOT: RECIP_UINT
|
||||
;EG-NOT: BFE_UINT
|
||||
|
||||
;SI-NOT: s_bfe_u32
|
||||
;SI: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN-NOT: s_bfe_u32
|
||||
;GCN: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%1 = ashr i64 %x, 40
|
||||
%2 = ashr i64 %y, 40
|
||||
|
@ -1,12 +1,13 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
|
||||
; SI-NOT: v_cmp
|
||||
; SI: v_cmp_ne_i32_e32 vcc,
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; SI-NEXT:buffer_store_byte [[RESULT]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT:buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
||||
; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
|
||||
; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
|
||||
@ -19,11 +20,11 @@ define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
|
||||
; SI-NOT: v_cmp
|
||||
; SI: v_cmp_ne_i32_e32 vcc,
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
|
||||
; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
|
||||
; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
|
||||
@ -37,12 +38,12 @@ define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
|
||||
; This really folds away to false
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
|
||||
; SI: v_cmp_eq_i32_e32 vcc,
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
|
||||
; SI-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
|
||||
; SI-NEXT: buffer_store_byte [[TMP]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN: v_cmp_eq_i32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
|
||||
; GCN-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
|
||||
; GCN-NEXT: buffer_store_byte [[TMP]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp eq i32 %a, %b
|
||||
%ext = sext i1 %icmp0 to i32
|
||||
@ -53,12 +54,12 @@ define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
|
||||
; This really folds away to true
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
|
||||
; SI: v_cmp_ne_i32_e32 vcc,
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
|
||||
; SI-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
|
||||
; SI-NEXT: buffer_store_byte [[TMP]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
|
||||
; GCN-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
|
||||
; GCN-NEXT: buffer_store_byte [[TMP]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp ne i32 %a, %b
|
||||
%ext = sext i1 %icmp0 to i32
|
||||
@ -68,11 +69,11 @@ define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
|
||||
; SI-NOT: v_cmp
|
||||
; SI: v_cmp_ne_i32_e32 vcc,
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp eq i32 %a, %b
|
||||
%ext = zext i1 %icmp0 to i32
|
||||
@ -82,11 +83,11 @@ define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
|
||||
; SI-NOT: v_cmp
|
||||
; SI: v_cmp_ne_i32_e32 vcc,
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_ne_i32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp ne i32 %a, %b
|
||||
%ext = zext i1 %icmp0 to i32
|
||||
@ -96,11 +97,11 @@ define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
|
||||
; SI-NOT: v_cmp
|
||||
; SI: v_cmp_eq_i32_e32 vcc,
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_eq_i32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp eq i32 %a, %b
|
||||
%ext = zext i1 %icmp0 to i32
|
||||
@ -110,10 +111,10 @@ define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
|
||||
; SI-NOT: v_cmp
|
||||
; SI: v_cmp_eq_i32_e32 vcc,
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN-NOT: v_cmp
|
||||
; GCN: v_cmp_eq_i32_e32 vcc,
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp ne i32 %a, %b
|
||||
%ext = zext i1 %icmp0 to i32
|
||||
@ -125,11 +126,13 @@ define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
|
||||
; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
|
||||
; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
|
||||
; SI: buffer_store_byte
|
||||
; SI: s_endpgm
|
||||
; VI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
|
||||
; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
|
||||
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
|
||||
; GCN: buffer_store_byte
|
||||
; GCN: s_endpgm
|
||||
define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp ne i32 %a, %b
|
||||
%ext = sext i1 %icmp0 to i32
|
||||
@ -139,12 +142,12 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
|
||||
; SI: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
|
||||
; SI: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
|
||||
; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
|
||||
; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
%b.ext = zext i8 %b to i32
|
||||
%icmp0 = icmp ne i32 %b.ext, 255
|
||||
@ -153,11 +156,11 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
|
||||
; SI: buffer_load_sbyte [[B:v[0-9]+]]
|
||||
; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_load_sbyte [[B:v[0-9]+]]
|
||||
; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
|
||||
%b = load i8 addrspace(1)* %b.ptr
|
||||
%b.ext = sext i8 %b to i32
|
||||
@ -167,11 +170,11 @@ define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nou
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
|
||||
; SI: s_load_dword [[B:s[0-9]+]]
|
||||
; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
; GCN: s_load_dword [[B:s[0-9]+]]
|
||||
; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
|
||||
%b.ext = sext i8 %b to i32
|
||||
%icmp0 = icmp ne i32 %b.ext, -1
|
||||
@ -184,12 +187,12 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
|
||||
; Should do a buffer_load_sbyte and compare with -1
|
||||
|
||||
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
|
||||
; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]]
|
||||
; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
|
||||
; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
%b.ext = sext i8 %b to i32
|
||||
%icmp0 = icmp ne i32 %b.ext, -1
|
||||
@ -198,9 +201,9 @@ define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
|
||||
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
%b.ext = zext i8 %b to i32
|
||||
%icmp0 = icmp ne i32 %b.ext, -1
|
||||
@ -209,9 +212,9 @@ define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
|
||||
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp ne i32 %a, %b
|
||||
%ext = zext i1 %icmp0 to i32
|
||||
@ -221,9 +224,9 @@ define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
|
||||
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
; SI-NEXT: s_endpgm
|
||||
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN-NEXT: s_endpgm
|
||||
define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
|
||||
%icmp0 = icmp ne i32 %a, %b
|
||||
%ext = zext i1 %icmp0 to i32
|
||||
|
@ -1,8 +1,10 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
|
||||
|
||||
; SMRD load with an immediate offset.
|
||||
; CHECK-LABEL: {{^}}smrd0:
|
||||
; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
|
||||
; GCN-LABEL: {{^}}smrd0:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
|
||||
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32 addrspace(2)* %ptr, i64 1
|
||||
@ -12,8 +14,9 @@ entry:
|
||||
}
|
||||
|
||||
; SMRD load with the largest possible immediate offset.
|
||||
; CHECK-LABEL: {{^}}smrd1:
|
||||
; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
|
||||
; GCN-LABEL: {{^}}smrd1:
|
||||
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32 addrspace(2)* %ptr, i64 255
|
||||
@ -23,10 +26,11 @@ entry:
|
||||
}
|
||||
|
||||
; SMRD load with an offset greater than the largest possible immediate.
|
||||
; CHECK-LABEL: {{^}}smrd2:
|
||||
; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
|
||||
; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
|
||||
; CHECK: s_endpgm
|
||||
; GCN-LABEL: {{^}}smrd2:
|
||||
; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
|
||||
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
|
||||
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||
; GCN: s_endpgm
|
||||
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32 addrspace(2)* %ptr, i64 256
|
||||
@ -36,17 +40,18 @@ entry:
|
||||
}
|
||||
|
||||
; SMRD load with a 64-bit offset
|
||||
; CHECK-LABEL: {{^}}smrd3:
|
||||
; GCN-LABEL: {{^}}smrd3:
|
||||
; FIXME: There are too many copies here because we don't fold immediates
|
||||
; through REG_SEQUENCE
|
||||
; CHECK: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
|
||||
; CHECK: s_mov_b32 s[[SHI:[0-9]+]], 4
|
||||
; CHECK: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
|
||||
; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
|
||||
; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
|
||||
; SI: s_mov_b32 s[[SHI:[0-9]+]], 4
|
||||
; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
|
||||
; FIXME: We should be able to use s_load_dword here
|
||||
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
|
||||
; CHECK: s_endpgm
|
||||
; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
|
||||
; TODO: Add VI checks
|
||||
; GCN: s_endpgm
|
||||
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||
entry:
|
||||
%0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
|
||||
@ -56,8 +61,9 @@ entry:
|
||||
}
|
||||
|
||||
; SMRD load using the load.const intrinsic with an immediate offset
|
||||
; CHECK-LABEL: {{^}}smrd_load_const0:
|
||||
; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
|
||||
; GCN-LABEL: {{^}}smrd_load_const0:
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
|
||||
define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
|
||||
main_body:
|
||||
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
|
||||
@ -69,8 +75,9 @@ main_body:
|
||||
|
||||
; SMRD load using the load.const intrinsic with the largest possible immediate
|
||||
; offset.
|
||||
; CHECK-LABEL: {{^}}smrd_load_const1:
|
||||
; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
|
||||
; GCN-LABEL: {{^}}smrd_load_const1:
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||
define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
|
||||
main_body:
|
||||
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
|
||||
@ -82,9 +89,10 @@ main_body:
|
||||
; SMRD load using the load.const intrinsic with an offset greater than the
|
||||
; largets possible immediate.
|
||||
; immediate offset.
|
||||
; CHECK-LABEL: {{^}}smrd_load_const2:
|
||||
; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
|
||||
; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
|
||||
; GCN-LABEL: {{^}}smrd_load_const2:
|
||||
; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||
define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
|
||||
main_body:
|
||||
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
|
||||
|
@ -1,8 +1,10 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}lshr_i32:
|
||||
; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
|
||||
@ -17,6 +19,9 @@ define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
@ -34,6 +39,11 @@ define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
|
||||
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
@ -49,6 +59,7 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
|
||||
|
||||
; FUNC-LABEL: {{^}}lshr_i64:
|
||||
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
|
||||
; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
|
||||
@ -74,6 +85,9 @@ define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
|
||||
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
|
||||
; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
|
||||
@ -111,6 +125,11 @@ define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
|
||||
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
|
||||
|
||||
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
|
||||
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
|
||||
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
|
||||
;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
|
||||
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
|
||||
|
||||
;FUNC-LABEL: {{^}}test_udiv:
|
||||
@ -36,39 +36,40 @@
|
||||
;EG: BFE_UINT
|
||||
;EG: BFE_UINT
|
||||
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%result = udiv i64 %x, %y
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
@ -109,39 +110,40 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG: BFE_UINT
|
||||
;EG: AND_INT {{.*}}, 1,
|
||||
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI: s_bfe_u32
|
||||
;SI-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN: s_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%result = urem i64 %x, %y
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
@ -152,10 +154,11 @@ define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG: RECIP_UINT
|
||||
;EG-NOT: BFE_UINT
|
||||
|
||||
;SI-NOT: s_bfe_u32
|
||||
;SI-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN-NOT: s_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%1 = lshr i64 %x, 33
|
||||
%2 = lshr i64 %y, 33
|
||||
@ -168,10 +171,11 @@ define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG: RECIP_UINT
|
||||
;EG-NOT: BFE_UINT
|
||||
|
||||
;SI-NOT: s_bfe_u32
|
||||
;SI-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: s_endpgm
|
||||
;GCN-NOT: s_bfe_u32
|
||||
;GCN-NOT: v_mad_f32
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: s_endpgm
|
||||
define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%1 = lshr i64 %x, 33
|
||||
%2 = lshr i64 %y, 33
|
||||
@ -187,9 +191,10 @@ define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG-NOT: RECIP_UINT
|
||||
;EG-NOT: BFE_UINT
|
||||
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: v_mad_f32
|
||||
;SI: s_endpgm
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: v_mad_f32
|
||||
;GCN: s_endpgm
|
||||
define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%1 = lshr i64 %x, 40
|
||||
%2 = lshr i64 %y, 40
|
||||
@ -205,9 +210,10 @@ define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
;EG-NOT: RECIP_UINT
|
||||
;EG-NOT: BFE_UINT
|
||||
|
||||
;SI-NOT: v_lshr_64
|
||||
;SI: v_mad_f32
|
||||
;SI: s_endpgm
|
||||
;SI-NOT: v_lshr_b64
|
||||
;VI-NOT: v_lshrrev_b64
|
||||
;GCN: v_mad_f32
|
||||
;GCN: s_endpgm
|
||||
define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
|
||||
%1 = lshr i64 %x, 40
|
||||
%2 = lshr i64 %y, 40
|
||||
|
@ -1,80 +1,87 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
declare float @llvm.fma.f32(float, float, float) #1
|
||||
declare float @llvm.fmuladd.f32(float, float, float) #1
|
||||
declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
|
||||
|
||||
|
||||
; SI-LABEL: {{^}}test_sgpr_use_twice_binop:
|
||||
; SI: s_load_dword [[SGPR:s[0-9]+]],
|
||||
; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
|
||||
; GCN: s_load_dword [[SGPR:s[0-9]+]],
|
||||
; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
|
||||
%dbl = fadd float %a, %a
|
||||
store float %dbl, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op:
|
||||
; SI: s_load_dword [[SGPR:s[0-9]+]],
|
||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
|
||||
; GCN: s_load_dword [[SGPR:s[0-9]+]],
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
|
||||
%fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
|
||||
store float %fma, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
|
||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||
%fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
|
||||
store float %fma, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
|
||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
|
||||
store float %fma, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
|
||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
|
||||
store float %fma, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
|
||||
; SI: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
|
||||
; GCN: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
|
||||
%fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
|
||||
store float %fma, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
|
||||
; SI: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
|
||||
; GCN: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
|
||||
%fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
|
||||
store float %fma, float addrspace(1)* %out, align 4
|
||||
@ -82,10 +89,10 @@ define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, fl
|
||||
}
|
||||
|
||||
; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
|
||||
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
|
||||
; SI: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
|
||||
; GCN: s_load_dword [[SGPR:s[0-9]+]]
|
||||
; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
|
||||
%fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
|
||||
store i32 %fma, i32 addrspace(1)* %out, align 4
|
||||
|
@ -1,14 +1,15 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
|
||||
; FUNC-LABEL: {{^}}ngroups_x:
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
||||
; EG: MOV [[VAL]], KC0[0].X
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @ngroups_x (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.ngroups.x() #0
|
||||
@ -21,8 +22,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[0].Y
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @ngroups_y (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.ngroups.y() #0
|
||||
@ -35,8 +37,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[0].Z
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @ngroups_z (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.ngroups.z() #0
|
||||
@ -49,8 +52,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[0].W
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @global_size_x (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.global.size.x() #0
|
||||
@ -63,8 +67,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[1].X
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @global_size_y (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.global.size.y() #0
|
||||
@ -77,8 +82,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[1].Y
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @global_size_z (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.global.size.z() #0
|
||||
@ -91,8 +97,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[1].Z
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @local_size_x (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.local.size.x() #0
|
||||
@ -105,8 +112,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[1].W
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @local_size_y (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.local.size.y() #0
|
||||
@ -119,8 +127,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[2].X
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @local_size_z (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.local.size.z() #0
|
||||
@ -133,8 +142,9 @@ entry:
|
||||
; EG: MOV [[VAL]], KC0[2].Z
|
||||
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @get_work_dim (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.AMDGPU.read.workdim() #0
|
||||
@ -147,8 +157,8 @@ entry:
|
||||
; kernel arguments, but this may change in the future.
|
||||
|
||||
; FUNC-LABEL: {{^}}tgid_x:
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @tgid_x (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tgid.x() #0
|
||||
@ -157,8 +167,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}tgid_y:
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @tgid_y (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tgid.y() #0
|
||||
@ -167,8 +177,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}tgid_z:
|
||||
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
|
||||
; SI: buffer_store_dword [[VVAL]]
|
||||
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
define void @tgid_z (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tgid.z() #0
|
||||
@ -177,7 +187,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}tidig_x:
|
||||
; SI: buffer_store_dword v0
|
||||
; GCN: buffer_store_dword v0
|
||||
define void @tidig_x (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.x() #0
|
||||
@ -186,7 +196,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}tidig_y:
|
||||
; SI: buffer_store_dword v1
|
||||
; GCN: buffer_store_dword v1
|
||||
define void @tidig_y (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.y() #0
|
||||
@ -195,7 +205,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}tidig_z:
|
||||
; SI: buffer_store_dword v2
|
||||
; GCN: buffer_store_dword v2
|
||||
define void @tidig_z (i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.z() #0
|
||||
|
Loading…
Reference in New Issue
Block a user