R600/SI: Enable a lot of existing tests for VI (squashed commits)

This is a union of these commits:

* R600/SI: Enable more tests for VI which need no changes

* R600/SI: Enable V_BCNT tests for VI
    Differences:
    - v_bcnt_..._e32 -> _e64
    - s_load_dword* inline offset is in bytes instead of dwords

* R600/SI: Enable all tests for VI which use S_LOAD_DWORD
    The inline offset is changed from dwords to bytes.

* R600/SI: Enable LDS tests for VI
    Differences:
    - the s_load_dword inline offset changed from dwords to bytes
    - the tests checked very little on CI, so they have been fixed to check all
      instructions that "SI" checked

* R600/SI: Enable lshr tests for VI

* R600/SI: Fix divrem64 tests
    - "v_lshl_64" was missing "b" before "64"
    - added VI-NOT checks

* R600/SI: Enable the SI.tid test for VI

* R600/SI: Enable the frem test for VI
    Also, the frem_f64 checking is added for CI-VI.

* R600/SI: Add VI tests for rsq.clamped

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228830 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Marek Olsak 2015-02-11 14:26:46 +00:00
parent f033db57e9
commit c0021e43ea
38 changed files with 1165 additions and 994 deletions

View File

@ -1,9 +1,12 @@
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=SI
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI
; SI: {{^}}f64_kernel_arg:
; GCN: {{^}}f64_kernel_arg:
; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
; SI: buffer_store_dwordx2
; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
; GCN: buffer_store_dwordx2
define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
entry:
store double %in, double addrspace(1)* %out

View File

@ -1,14 +1,17 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
; SI: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
@ -18,16 +21,18 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
@ -38,8 +43,8 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
; SI: s_endpgm
; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
@ -51,13 +56,15 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; SI: s_endpgm
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x28
; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
@ -66,15 +73,17 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; SI: s_endpgm
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
@ -8,11 +9,11 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i32:
; SI: s_load_dword [[SVAL:s[0-9]+]],
; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
; GCN: s_load_dword [[SVAL:s[0-9]+]],
; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; GCN: buffer_store_dword [[VRESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@ -23,10 +24,10 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; XXX - Why 0 in register?
; FUNC-LABEL: {{^}}v_ctpop_i32:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@ -37,12 +38,13 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
}
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
; SI: buffer_load_dword [[VAL1:v[0-9]+]],
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
; GCN: buffer_load_dword [[VAL1:v[0-9]+]],
; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@ -57,11 +59,11 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
}
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
; SI-NEXT: s_waitcnt
; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
; SI-NEXT: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
; GCN-NEXT: s_waitcnt
; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
; GCN-NEXT: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
%val0 = load i32 addrspace(1)* %in0, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
@ -71,9 +73,9 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
}
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@ -85,11 +87,11 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@ -103,15 +105,15 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@ -129,23 +131,23 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: v_bcnt_u32_b32_e64
; SI: s_endpgm
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: v_bcnt_u32_b32_e64
; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@ -171,10 +173,10 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@ -186,10 +188,10 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@ -201,11 +203,12 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -215,11 +218,11 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@ -231,11 +234,11 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@ -247,11 +250,12 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
@ -269,10 +273,11 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
; FUNC-LABEL: {{^}}ctpop_i32_in_br:
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34
; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BCNT_INT
define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
entry:

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
@ -8,10 +9,11 @@ declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i64:
; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
; VI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; GCN: buffer_store_dword [[VRESULT]],
; GCN: s_endpgm
define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
@ -20,11 +22,12 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
}
; FUNC-LABEL: {{^}}v_ctpop_i64:
; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
@ -34,9 +37,9 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
}
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
; SI: s_bcnt1_i32_b64
; SI: s_bcnt1_i32_b64
; SI: s_endpgm
; GCN: s_bcnt1_i32_b64
; GCN: s_bcnt1_i32_b64
; GCN: s_endpgm
define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
@ -45,11 +48,11 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
}
; FUNC-LABEL: {{^}}s_ctpop_v4i64:
; SI: s_bcnt1_i32_b64
; SI: s_bcnt1_i32_b64
; SI: s_bcnt1_i32_b64
; SI: s_bcnt1_i32_b64
; SI: s_endpgm
; GCN: s_bcnt1_i32_b64
; GCN: s_bcnt1_i32_b64
; GCN: s_bcnt1_i32_b64
; GCN: s_bcnt1_i32_b64
; GCN: s_endpgm
define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
@ -58,11 +61,11 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
}
; FUNC-LABEL: {{^}}v_ctpop_v2i64:
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: s_endpgm
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: s_endpgm
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <2 x i64> addrspace(1)* %in, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
@ -72,15 +75,15 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v4i64:
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: s_endpgm
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: v_bcnt_u32_b32
; GCN: s_endpgm
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <4 x i64> addrspace(1)* %in, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
@ -94,11 +97,12 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
; SI: s_endpgm
; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
; GCN: s_endpgm
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
@ -10,7 +11,7 @@
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
; SI: v_and_b32
; GCN: v_and_b32
define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
%bc= bitcast i32 %in to float
@ -23,7 +24,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
; SI: v_and_b32
; GCN: v_and_b32
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
%bc= bitcast i32 %in to float
@ -35,7 +36,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) {
; FUNC-LABEL: {{^}}fabs_f32:
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; SI: v_and_b32
; GCN: v_and_b32
define void @fabs_f32(float addrspace(1)* %out, float %in) {
%fabs = call float @llvm.fabs.f32(float %in)
store float %fabs, float addrspace(1)* %out
@ -46,8 +47,8 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) {
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; SI: v_and_b32
; SI: v_and_b32
; GCN: v_and_b32
; GCN: v_and_b32
define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@ -60,20 +61,21 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; SI: v_and_b32
; SI: v_and_b32
; SI: v_and_b32
; SI: v_and_b32
; GCN: v_and_b32
; GCN: v_and_b32
; GCN: v_and_b32
; GCN: v_and_b32
define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
store <4 x float> %fabs, <4 x float> addrspace(1)* %out
ret void
}
; SI-LABEL: {{^}}fabs_fn_fold:
; GCN-LABEL: {{^}}fabs_fn_fold:
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: and
; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @fabs(float %in0)
%fmul = fmul float %fabs, %in1
@ -81,10 +83,11 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
ret void
}
; SI-LABEL: {{^}}fabs_fold:
; GCN-LABEL: {{^}}fabs_fold:
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: and
; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: and
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @llvm.fabs.f32(float %in0)
%fmul = fmul float %fabs, %in1

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
declare double @llvm.ceil.f64(double) nounwind readnone
declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@ -10,12 +11,14 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read
; FUNC-LABEL: {{^}}test_copysign_f32:
; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; VI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0x2c
; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30
; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: BFI_INT
define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
@ -25,7 +28,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
}
; FUNC-LABEL: {{^}}test_copysign_v2f32:
; SI: s_endpgm
; GCN: s_endpgm
; EG: BFI_INT
; EG: BFI_INT
@ -36,7 +39,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
}
; FUNC-LABEL: {{^}}test_copysign_v4f32:
; SI: s_endpgm
; GCN: s_endpgm
; EG: BFI_INT
; EG: BFI_INT

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
declare double @llvm.copysign.f64(double, double) nounwind readnone
declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
@ -7,13 +8,15 @@ declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind r
; FUNC-LABEL: {{^}}test_copysign_f64:
; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
; SI: s_endpgm
; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
; GCN: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
; GCN: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
; GCN: s_endpgm
define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
%result = call double @llvm.copysign.f64(double %mag, double %sign)
store double %result, double addrspace(1)* %out, align 8
@ -21,7 +24,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s
}
; FUNC-LABEL: {{^}}test_copysign_v2f64:
; SI: s_endpgm
; GCN: s_endpgm
define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
%result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
@ -29,7 +32,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %
}
; FUNC-LABEL: {{^}}test_copysign_v4f64:
; SI: s_endpgm
; GCN: s_endpgm
define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
%result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
declare double @llvm.floor.f64(double) nounwind readnone
declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone

View File

@ -1,7 +1,8 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_f64:
; SI: v_xor_b32
; GCN: v_xor_b32
define void @fneg_f64(double addrspace(1)* %out, double %in) {
%fneg = fsub double -0.000000e+00, %in
store double %fneg, double addrspace(1)* %out
@ -9,8 +10,8 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) {
}
; FUNC-LABEL: {{^}}fneg_v2f64:
; SI: v_xor_b32
; SI: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
%fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
store <2 x double> %fneg, <2 x double> addrspace(1)* %out
@ -23,10 +24,10 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double>
; R600: -PV
; R600: -PV
; SI: v_xor_b32
; SI: v_xor_b32
; SI: v_xor_b32
; SI: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
%fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
store <4 x double> %fneg, <4 x double> addrspace(1)* %out
@ -39,7 +40,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
; FUNC-LABEL: {{^}}fneg_free_f64:
; FIXME: Unnecessary copy to VGPRs
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fsub = fsub double 0.0, %bc
@ -47,10 +48,11 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
ret void
}
; SI-LABEL: {{^}}fneg_fold_f64:
; GCN-LABEL: {{^}}fneg_fold_f64:
; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: xor
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN-NOT: xor
; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
%fsub = fsub double -0.0, %in
%fmul = fmul double %fsub, %in

View File

@ -1,10 +1,11 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_f32:
; R600: -PV
; SI: v_xor_b32
; GCN: v_xor_b32
define void @fneg_f32(float addrspace(1)* %out, float %in) {
%fneg = fsub float -0.000000e+00, %in
store float %fneg, float addrspace(1)* %out
@ -15,8 +16,8 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) {
; R600: -PV
; R600: -PV
; SI: v_xor_b32
; SI: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
%fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
store <2 x float> %fneg, <2 x float> addrspace(1)* %out
@ -29,10 +30,10 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i
; R600: -PV
; R600: -PV
; SI: v_xor_b32
; SI: v_xor_b32
; SI: v_xor_b32
; SI: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
; GCN: v_xor_b32
define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
%fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
store <4 x float> %fneg, <4 x float> addrspace(1)* %out
@ -48,7 +49,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i
; R600: -KC0[2].Z
; XXX: We could use v_add_f32_e64 with the negate bit here instead.
; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fsub = fsub float 0.0, %bc
@ -58,8 +59,9 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
; FUNC-LABEL: {{^}}fneg_fold_f32:
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: xor
; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: xor
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
%fsub = fsub float -0.0, %in
%fmul = fmul float %fsub, %in

View File

@ -1,16 +1,18 @@
; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}frem_f32:
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
; SI-DAG: v_cmp
; SI-DAG: v_mul_f32
; SI: v_rcp_f32_e32
; SI: v_mul_f32_e32
; SI: v_mul_f32_e32
; SI: v_trunc_f32_e32
; SI: v_mad_f32
; SI: s_endpgm
; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
; GCN-DAG: v_cmp
; GCN-DAG: v_mul_f32
; GCN: v_rcp_f32_e32
; GCN: v_mul_f32_e32
; GCN: v_mul_f32_e32
; GCN: v_trunc_f32_e32
; GCN: v_mad_f32
; GCN: s_endpgm
define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #0 {
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
@ -22,14 +24,14 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
}
; FUNC-LABEL: {{^}}unsafe_frem_f32:
; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}}
; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
; GCN: buffer_store_dword [[RESULT]]
; GCN: s_endpgm
define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #1 {
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
@ -40,11 +42,19 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
ret void
}
; TODO: This should check something when f64 fdiv is implemented
; correctly
; FUNC-LABEL: {{^}}frem_f64:
; SI: s_endpgm
; GCN: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
; GCN: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
; TODO: Check SI.
; CI: v_rcp_f64_e32 [[INVY:v\[[0-9]+:[0-9]+\]]], [[Y]]
; CI: v_mul_f64 [[DIV:v\[[0-9]+:[0-9]+\]]], [[X]], [[INVY]]
; CI: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[DIV]]
; CI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], [[TRUNC]], [[Y]]
; SI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, [[Y]]
; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[RESULTM]]
; GCN: buffer_store_dwordx2 [[RESULT]], {{.*}}, 0
; GCN: s_endpgm
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
%r0 = load double addrspace(1)* %in1, align 8
@ -55,11 +65,12 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; FUNC-LABEL: {{^}}unsafe_frem_f64:
; SI: v_rcp_f64_e32
; SI: v_mul_f64
; GCN: v_rcp_f64_e32
; GCN: v_mul_f64
; SI: v_bfe_u32
; SI: v_fma_f64
; SI: s_endpgm
; CI: v_trunc_f64_e32
; GCN: v_fma_f64
; GCN: s_endpgm
define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #1 {
%r0 = load double addrspace(1)* %in1, align 8

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
declare double @llvm.trunc.f64(double) nounwind readnone
declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: {{^}}use_gep_address_space:

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
@ -9,6 +10,7 @@
; FUNC-LABEL: {{^}}float:
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
; VI: s_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@ -31,6 +33,7 @@ entry:
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
; VI: s_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@ -53,7 +56,7 @@ entry:
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
; FUNC-LABEL: {{^}}struct_foo_gv_load:
; SI: s_load_dword
; GCN: s_load_dword
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
@ -70,6 +73,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
; FUNC-LABEL: {{^}}array_v1_gv_load:
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
; VI: s_load_dword
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
%load = load <1 x i32> addrspace(2)* %gep, align 4

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=CHECK %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CHECK %s
; Use a 64-bit value with lo bits that can be represented as an inline constant
; CHECK-LABEL: {{^}}i64_imm_inline_lo:
@ -303,7 +304,8 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
@ -313,7 +315,8 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
@ -323,7 +326,8 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
@ -333,7 +337,8 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
@ -343,7 +348,8 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
@ -353,7 +359,8 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
@ -363,7 +370,8 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
@ -373,7 +381,8 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
@ -383,7 +392,8 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
@ -394,7 +404,8 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
; CHECK-LABEL: {{^}}add_inline_imm_1_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
@ -404,7 +415,8 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_2_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
@ -414,7 +426,8 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_16_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
@ -424,7 +437,8 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
@ -434,7 +448,8 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
@ -444,7 +459,8 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
@ -454,7 +470,8 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_63_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
@ -464,7 +481,8 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_64_f64
; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {

View File

@ -1,11 +1,11 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; EG-LABEL: {{^}}i8_arg:
; FUNC-LABEL: {{^}}i8_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-LABEL: {{^}}i8_arg:
; SI: buffer_load_ubyte
; GCN: buffer_load_ubyte
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
@ -14,10 +14,10 @@ entry:
ret void
}
; EG-LABEL: {{^}}i8_zext_arg:
; FUNC-LABEL: {{^}}i8_zext_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-LABEL: {{^}}i8_zext_arg:
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
entry:
@ -26,10 +26,10 @@ entry:
ret void
}
; EG-LABEL: {{^}}i8_sext_arg:
; FUNC-LABEL: {{^}}i8_sext_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-LABEL: {{^}}i8_sext_arg:
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
entry:
@ -38,10 +38,9 @@ entry:
ret void
}
; EG-LABEL: {{^}}i16_arg:
; FUNC-LABEL: {{^}}i16_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-LABEL: {{^}}i16_arg:
; SI: buffer_load_ushort
; GCN: buffer_load_ushort
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
entry:
@ -50,10 +49,10 @@ entry:
ret void
}
; EG-LABEL: {{^}}i16_zext_arg:
; FUNC-LABEL: {{^}}i16_zext_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-LABEL: {{^}}i16_zext_arg:
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
entry:
@ -62,10 +61,10 @@ entry:
ret void
}
; EG-LABEL: {{^}}i16_sext_arg:
; FUNC-LABEL: {{^}}i16_sext_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-LABEL: {{^}}i16_sext_arg:
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
entry:
@ -74,176 +73,170 @@ entry:
ret void
}
; EG-LABEL: {{^}}i32_arg:
; FUNC-LABEL: {{^}}i32_arg:
; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI-LABEL: {{^}}i32_arg:
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
entry:
store i32 %in, i32 addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}f32_arg:
; FUNC-LABEL: {{^}}f32_arg:
; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI-LABEL: {{^}}f32_arg:
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
entry:
store float %in, float addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v2i8_arg:
; FUNC-LABEL: {{^}}v2i8_arg:
; EG: VTX_READ_8
; EG: VTX_READ_8
; SI-LABEL: {{^}}v2i8_arg:
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
entry:
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
ret void
}
; EG-LABEL: {{^}}v2i16_arg:
; FUNC-LABEL: {{^}}v2i16_arg:
; EG: VTX_READ_16
; EG: VTX_READ_16
; SI-LABEL: {{^}}v2i16_arg:
; SI-DAG: buffer_load_ushort
; SI-DAG: buffer_load_ushort
; GCN-DAG: buffer_load_ushort
; GCN-DAG: buffer_load_ushort
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
ret void
}
; EG-LABEL: {{^}}v2i32_arg:
; FUNC-LABEL: {{^}}v2i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI-LABEL: {{^}}v2i32_arg:
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
entry:
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v2f32_arg:
; FUNC-LABEL: {{^}}v2f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI-LABEL: {{^}}v2f32_arg:
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
entry:
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v3i8_arg:
; FUNC-LABEL: {{^}}v3i8_arg:
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
; SI-LABEL: {{^}}v3i8_arg:
define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
entry:
store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v3i16_arg:
; FUNC-LABEL: {{^}}v3i16_arg:
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
; SI-LABEL: {{^}}v3i16_arg:
define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
entry:
store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v3i32_arg:
; FUNC-LABEL: {{^}}v3i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI-LABEL: {{^}}v3i32_arg:
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
entry:
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v3f32_arg:
; FUNC-LABEL: {{^}}v3f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI-LABEL: {{^}}v3f32_arg:
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
entry:
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v4i8_arg:
; FUNC-LABEL: {{^}}v4i8_arg:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
; SI-LABEL: {{^}}v4i8_arg:
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
ret void
}
; EG-LABEL: {{^}}v4i16_arg:
; FUNC-LABEL: {{^}}v4i16_arg:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
; SI-LABEL: {{^}}v4i16_arg:
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
entry:
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
ret void
}
; EG-LABEL: {{^}}v4i32_arg:
; FUNC-LABEL: {{^}}v4i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI-LABEL: {{^}}v4i32_arg:
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v4f32_arg:
; FUNC-LABEL: {{^}}v4f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI-LABEL: {{^}}v4f32_arg:
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v8i8_arg:
; FUNC-LABEL: {{^}}v8i8_arg:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
@ -252,21 +245,20 @@ entry:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
; SI-LABEL: {{^}}v8i8_arg:
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
entry:
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
ret void
}
; EG-LABEL: {{^}}v8i16_arg:
; FUNC-LABEL: {{^}}v8i16_arg:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
@ -275,22 +267,21 @@ entry:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
; SI-LABEL: {{^}}v8i16_arg:
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
entry:
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
ret void
}
; EG-LABEL: {{^}}v8i32_arg:
; FUNC-LABEL: {{^}}v8i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@ -299,15 +290,15 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI-LABEL: {{^}}v8i32_arg:
; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
entry:
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v8f32_arg:
; FUNC-LABEL: {{^}}v8f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@ -316,7 +307,6 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI-LABEL: {{^}}v8f32_arg:
; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
entry:
@ -324,7 +314,7 @@ entry:
ret void
}
; EG-LABEL: {{^}}v16i8_arg:
; FUNC-LABEL: {{^}}v16i8_arg:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
@ -341,30 +331,29 @@ entry:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
; SI-LABEL: {{^}}v16i8_arg:
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
entry:
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
ret void
}
; EG-LABEL: {{^}}v16i16_arg:
; FUNC-LABEL: {{^}}v16i16_arg:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
@ -381,30 +370,29 @@ entry:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
; SI-LABEL: {{^}}v16i16_arg:
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
entry:
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
ret void
}
; EG-LABEL: {{^}}v16i32_arg:
; FUNC-LABEL: {{^}}v16i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@ -421,15 +409,15 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI-LABEL: {{^}}v16i32_arg:
; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
entry:
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
ret void
}
; EG-LABEL: {{^}}v16f32_arg:
; FUNC-LABEL: {{^}}v16f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@ -446,8 +434,8 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI-LABEL: {{^}}v16f32_arg:
; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
entry:
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
@ -455,18 +443,18 @@ entry:
}
; FUNC-LABEL: {{^}}kernel_arg_i64:
; SI: s_load_dwordx2
; SI: s_load_dwordx2
; SI: buffer_store_dwordx2
; GCN: s_load_dwordx2
; GCN: s_load_dwordx2
; GCN: buffer_store_dwordx2
define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
store i64 %a, i64 addrspace(1)* %out, align 8
ret void
}
; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
; XSI: s_load_dwordx2
; XSI: s_load_dwordx2
; XSI: buffer_store_dwordx2
; XGCN: s_load_dwordx2
; XGCN: s_load_dwordx2
; XGCN: buffer_store_dwordx2
; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
; ret void

View File

@ -1,25 +1,29 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
; SI-LABEL: {{^}}test_div_fixup_f32:
; GCN-LABEL: {{^}}test_div_fixup_f32:
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
%result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
; SI-LABEL: {{^}}test_div_fixup_f64:
; SI: v_div_fixup_f64
; GCN-LABEL: {{^}}test_div_fixup_f64:
; GCN: v_div_fixup_f64
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
%result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
store double %result, double addrspace(1)* %out, align 8

View File

@ -1,25 +1,29 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
; SI-LABEL: {{^}}test_div_fmas_f32:
; GCN-LABEL: {{^}}test_div_fmas_f32:
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
%result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
; SI-LABEL: {{^}}test_div_fmas_f64:
; SI: v_div_fmas_f64
; GCN-LABEL: {{^}}test_div_fmas_f64:
; GCN: v_div_fmas_f64
define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
%result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
store double %result, double addrspace(1)* %out, align 8

View File

@ -1,9 +1,21 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
; FUNC-LABEL: {{^}}rsq_clamped_f64:
; SI: v_rsq_clamp_f64_e32
; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3]
; TODO: this constant should be folded:
; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
%rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
store double %rsq_clamped, double addrspace(1)* %out, align 8

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@ -6,7 +7,15 @@ declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}rsq_clamped_f32:
; SI: v_rsq_clamp_f32_e32
; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}}
; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
; TODO: this constant should be folded:
; VI: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff
; VI: v_max_f32_e32 {{v[0-9]+}}, [[MIN]], [[MINFLT]]
; EG: RECIPSQRT_CLAMPED
define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
store float %rsq_clamped, float addrspace(1)* %out, align 4

View File

@ -1,7 +1,9 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
;CHECK: v_mbcnt_lo_u32_b32_e64
;CHECK: v_mbcnt_hi_u32_b32_e32
;GCN: v_mbcnt_lo_u32_b32_e64
;SI: v_mbcnt_hi_u32_b32_e32
;VI: v_mbcnt_hi_u32_b32_e64
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
main_body:

View File

@ -1,3 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}round_f32:

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
; BOTH-LABEL: {{^}}local_i32_load
; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]

View File

@ -1,15 +1,16 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
; EG: LDS_WRXCHG_RET *
; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
; SI: s_load_dword [[SPTR:s[0-9]+]],
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
; GCN: s_load_dword [[SPTR:s[0-9]+]],
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -18,8 +19,8 @@ define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
; EG: LDS_WRXCHG_RET *
; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
@ -30,12 +31,12 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
; XXX - Is it really necessary to load 4 into VGPR?
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
; EG: LDS_ADD_RET *
; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
; SI: s_load_dword [[SPTR:s[0-9]+]],
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
; GCN: s_load_dword [[SPTR:s[0-9]+]],
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -44,8 +45,8 @@ define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
; EG: LDS_ADD_RET *
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
@ -56,8 +57,8 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
; EG: LDS_ADD_RET *
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
@ -69,9 +70,9 @@ define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
; EG: LDS_ADD_RET *
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
; GCN: s_endpgm
define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -80,9 +81,9 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
; EG: LDS_ADD_RET *
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
; GCN: s_endpgm
define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
@ -93,8 +94,8 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
; EG: LDS_ADD_RET *
; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; CIVI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
@ -106,8 +107,8 @@ define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
; EG: LDS_SUB_RET *
; SI: ds_sub_rtn_u32
; SI: s_endpgm
; GCN: ds_sub_rtn_u32
; GCN: s_endpgm
define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -116,8 +117,8 @@ define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
; EG: LDS_SUB_RET *
; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
@ -127,9 +128,9 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
; EG: LDS_SUB_RET *
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
; GCN: s_endpgm
define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -138,9 +139,9 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
; EG: LDS_SUB_RET *
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
; GCN: s_endpgm
define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
@ -150,8 +151,8 @@ define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
; EG: LDS_AND_RET *
; SI: ds_and_rtn_b32
; SI: s_endpgm
; GCN: ds_and_rtn_b32
; GCN: s_endpgm
define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -160,8 +161,8 @@ define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
; EG: LDS_AND_RET *
; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
@ -171,8 +172,8 @@ define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
; EG: LDS_OR_RET *
; SI: ds_or_rtn_b32
; SI: s_endpgm
; GCN: ds_or_rtn_b32
; GCN: s_endpgm
define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -181,8 +182,8 @@ define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %pt
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
; EG: LDS_OR_RET *
; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
@ -192,8 +193,8 @@ define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
; EG: LDS_XOR_RET *
; SI: ds_xor_rtn_b32
; SI: s_endpgm
; GCN: ds_xor_rtn_b32
; GCN: s_endpgm
define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -202,8 +203,8 @@ define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
; EG: LDS_XOR_RET *
; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
@ -221,8 +222,8 @@ define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
; EG: LDS_MIN_INT_RET *
; SI: ds_min_rtn_i32
; SI: s_endpgm
; GCN: ds_min_rtn_i32
; GCN: s_endpgm
define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -231,8 +232,8 @@ define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
; EG: LDS_MIN_INT_RET *
; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
@ -242,8 +243,8 @@ define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
; EG: LDS_MAX_INT_RET *
; SI: ds_max_rtn_i32
; SI: s_endpgm
; GCN: ds_max_rtn_i32
; GCN: s_endpgm
define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -252,8 +253,8 @@ define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
; EG: LDS_MAX_INT_RET *
; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
@ -263,8 +264,8 @@ define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
; EG: LDS_MIN_UINT_RET *
; SI: ds_min_rtn_u32
; SI: s_endpgm
; GCN: ds_min_rtn_u32
; GCN: s_endpgm
define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -273,8 +274,8 @@ define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
; EG: LDS_MIN_UINT_RET *
; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
@ -284,8 +285,8 @@ define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
; EG: LDS_MAX_UINT_RET *
; SI: ds_max_rtn_u32
; SI: s_endpgm
; GCN: ds_max_rtn_u32
; GCN: s_endpgm
define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@ -294,8 +295,8 @@ define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
; EG: LDS_MAX_UINT_RET *
; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
@ -304,19 +305,19 @@ define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
; SI: s_load_dword [[SPTR:s[0-9]+]],
; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; SI: s_endpgm
; GCN: s_load_dword [[SPTR:s[0-9]+]],
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
@ -325,19 +326,19 @@ define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
; XXX - Is it really necessary to load 4 into VGPR?
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
; SI: s_load_dword [[SPTR:s[0-9]+]],
; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0]
; SI: s_endpgm
; GCN: s_load_dword [[SPTR:s[0-9]+]],
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; GCN: ds_add_u32 [[VPTR]], [[DATA]] [M0]
; GCN: s_endpgm
define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
@ -346,8 +347,8 @@ define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
; SI: s_endpgm
; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
; GCN: s_endpgm
define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
@ -357,18 +358,18 @@ define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
; GCN: s_endpgm
define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
; GCN: s_endpgm
define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
@ -377,8 +378,8 @@ define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}}
; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; CIVI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
@ -388,16 +389,16 @@ define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
; SI: ds_sub_u32
; SI: s_endpgm
; GCN: ds_sub_u32
; GCN: s_endpgm
define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
@ -405,18 +406,18 @@ define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
; GCN: s_endpgm
define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
; GCN: s_endpgm
define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
@ -424,16 +425,16 @@ define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
; SI: ds_and_b32
; SI: s_endpgm
; GCN: ds_and_b32
; GCN: s_endpgm
define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
@ -441,16 +442,16 @@ define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
; SI: ds_or_b32
; SI: s_endpgm
; GCN: ds_or_b32
; GCN: s_endpgm
define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
@ -458,16 +459,16 @@ define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
; SI: ds_xor_b32
; SI: s_endpgm
; GCN: ds_xor_b32
; GCN: s_endpgm
define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
@ -482,16 +483,16 @@ define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
; }
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
; SI: ds_min_i32
; SI: s_endpgm
; GCN: ds_min_i32
; GCN: s_endpgm
define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
@ -499,16 +500,16 @@ define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
; SI: ds_max_i32
; SI: s_endpgm
; GCN: ds_max_i32
; GCN: s_endpgm
define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
@ -516,16 +517,16 @@ define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
; SI: ds_min_u32
; SI: s_endpgm
; GCN: ds_min_u32
; GCN: s_endpgm
define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
@ -533,16 +534,16 @@ define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
; SI: ds_max_u32
; SI: s_endpgm
; GCN: ds_max_u32
; GCN: s_endpgm
define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: s_endpgm
; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst

View File

@ -1,8 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
; SI: ds_wrxchg_rtn_b64
; SI: s_endpgm
; GCN: ds_wrxchg_rtn_b64
; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -10,8 +11,8 @@ define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
@ -20,8 +21,8 @@ define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
}
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64:
; SI: ds_add_rtn_u64
; SI: s_endpgm
; GCN: ds_add_rtn_u64
; GCN: s_endpgm
define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -29,13 +30,14 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
@ -44,11 +46,11 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
}
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -56,8 +58,8 @@ define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
; SI: ds_inc_rtn_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_inc_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
@ -66,8 +68,8 @@ define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
}
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64:
; SI: ds_sub_rtn_u64
; SI: s_endpgm
; GCN: ds_sub_rtn_u64
; GCN: s_endpgm
define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -75,8 +77,8 @@ define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
; SI: ds_sub_rtn_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_sub_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
@ -85,11 +87,11 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
}
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -97,8 +99,8 @@ define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
; SI: ds_dec_rtn_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_dec_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
@ -107,8 +109,8 @@ define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
}
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64:
; SI: ds_and_rtn_b64
; SI: s_endpgm
; GCN: ds_and_rtn_b64
; GCN: s_endpgm
define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -116,8 +118,8 @@ define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
; SI: ds_and_rtn_b64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_and_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
@ -126,8 +128,8 @@ define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
}
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64:
; SI: ds_or_rtn_b64
; SI: s_endpgm
; GCN: ds_or_rtn_b64
; GCN: s_endpgm
define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -135,8 +137,8 @@ define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %pt
}
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
; SI: ds_or_rtn_b64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_or_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
@ -145,8 +147,8 @@ define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(
}
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64:
; SI: ds_xor_rtn_b64
; SI: s_endpgm
; GCN: ds_xor_rtn_b64
; GCN: s_endpgm
define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -154,8 +156,8 @@ define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
; SI: ds_xor_rtn_b64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_xor_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
@ -172,8 +174,8 @@ define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
; }
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64:
; SI: ds_min_rtn_i64
; SI: s_endpgm
; GCN: ds_min_rtn_i64
; GCN: s_endpgm
define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -181,8 +183,8 @@ define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
; SI: ds_min_rtn_i64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_min_rtn_i64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
@ -191,8 +193,8 @@ define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
}
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64:
; SI: ds_max_rtn_i64
; SI: s_endpgm
; GCN: ds_max_rtn_i64
; GCN: s_endpgm
define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -200,8 +202,8 @@ define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
; SI: ds_max_rtn_i64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_max_rtn_i64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
@ -210,8 +212,8 @@ define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
}
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64:
; SI: ds_min_rtn_u64
; SI: s_endpgm
; GCN: ds_min_rtn_u64
; GCN: s_endpgm
define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -219,8 +221,8 @@ define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
; SI: ds_min_rtn_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_min_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
@ -229,8 +231,8 @@ define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
}
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64:
; SI: ds_max_rtn_u64
; SI: s_endpgm
; GCN: ds_max_rtn_u64
; GCN: s_endpgm
define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@ -238,8 +240,8 @@ define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
; SI: ds_max_rtn_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_max_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
@ -248,16 +250,16 @@ define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64:
; SI: ds_wrxchg_rtn_b64
; SI: s_endpgm
; GCN: ds_wrxchg_rtn_b64
; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
@ -265,8 +267,8 @@ define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64:
; SI: ds_add_u64
; SI: s_endpgm
; GCN: ds_add_u64
; GCN: s_endpgm
define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
@ -274,11 +276,12 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
; SI: s_endpgm
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
; GCN: s_endpgm
define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
@ -286,18 +289,18 @@ define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; SI: s_endpgm
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
; GCN: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; GCN: s_endpgm
define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
; SI: ds_inc_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_inc_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
@ -305,16 +308,16 @@ define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64:
; SI: ds_sub_u64
; SI: s_endpgm
; GCN: ds_sub_u64
; GCN: s_endpgm
define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
; SI: ds_sub_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_sub_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
@ -322,18 +325,18 @@ define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; SI: s_endpgm
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
; GCN: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
; GCN: s_endpgm
define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
; SI: ds_dec_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_dec_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
@ -341,16 +344,16 @@ define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64:
; SI: ds_and_b64
; SI: s_endpgm
; GCN: ds_and_b64
; GCN: s_endpgm
define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
; SI: ds_and_b64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_and_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
@ -358,16 +361,16 @@ define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64:
; SI: ds_or_b64
; SI: s_endpgm
; GCN: ds_or_b64
; GCN: s_endpgm
define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
; SI: ds_or_b64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_or_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
@ -375,16 +378,16 @@ define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64:
; SI: ds_xor_b64
; SI: s_endpgm
; GCN: ds_xor_b64
; GCN: s_endpgm
define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
; SI: ds_xor_b64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_xor_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
@ -399,16 +402,16 @@ define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
; }
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64:
; SI: ds_min_i64
; SI: s_endpgm
; GCN: ds_min_i64
; GCN: s_endpgm
define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
; SI: ds_min_i64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_min_i64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
@ -416,16 +419,16 @@ define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64:
; SI: ds_max_i64
; SI: s_endpgm
; GCN: ds_max_i64
; GCN: s_endpgm
define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
; SI: ds_max_i64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_max_i64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
@ -433,16 +436,16 @@ define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64:
; SI: ds_min_u64
; SI: s_endpgm
; GCN: ds_min_u64
; GCN: s_endpgm
define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
; SI: ds_min_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_min_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
@ -450,16 +453,16 @@ define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64:
; SI: ds_max_u64
; SI: s_endpgm
; GCN: ds_max_u64
; GCN: s_endpgm
define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
; SI: ds_max_u64 {{.*}} offset:32
; SI: s_endpgm
; GCN: ds_max_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst

View File

@ -1,13 +1,16 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s
; Make sure there isn't an extra space between the instruction name and first operands.
; SI-LABEL: {{^}}add_f32:
; GCN-LABEL: {{^}}add_f32:
; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
; SI: buffer_store_dword [[RESULT]],
; VI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
; GCN: buffer_store_dword [[RESULT]],
define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
%result = fadd float %a, %b
store float %result, float addrspace(1)* %out

View File

@ -1,6 +1,8 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
declare i32 @llvm.r600.read.tidig.x() nounwind readnone

View File

@ -1,10 +1,18 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI %s
; FUNC-LABEL: {{^}}cluster_arg_loads:
; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24
; VI-NEXT: s_nop 0
; VI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-NEXT: s_nop 0
; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
; VI-NEXT: s_nop 0
; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38
define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
store i32 %x, i32 addrspace(1)* %out0, align 4
store i32 %y, i32 addrspace(1)* %out1, align 4

View File

@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s
; When a frame index offset is more than 12-bits, make sure we don't store
; it in mubuf's offset field.

View File

@ -1,4 +1,5 @@
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
;FUNC-LABEL: {{^}}test_sdiv:
@ -35,39 +36,40 @@
;EG: BFE_UINT
;EG: BFE_UINT
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN: v_bfe_u32
;GCN-NOT: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = sdiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
@ -108,39 +110,40 @@ define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN-NOT: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
@ -151,10 +154,11 @@ define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN-NOT: s_bfe_u32
;GCN-NOT: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
@ -167,10 +171,11 @@ define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN-NOT: s_bfe_u32
;GCN-NOT: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
@ -186,10 +191,11 @@ define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN-NOT: s_bfe_u32
;GCN: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
@ -205,10 +211,11 @@ define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN-NOT: s_bfe_u32
;GCN: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40

View File

@ -1,12 +1,13 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
; SI-NOT: v_cmp
; SI: v_cmp_ne_i32_e32 vcc,
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; SI-NEXT:buffer_store_byte [[RESULT]]
; SI-NEXT: s_endpgm
; GCN-NOT: v_cmp
; GCN: v_cmp_ne_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT:buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
@ -19,11 +20,11 @@ define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
; SI-NOT: v_cmp
; SI: v_cmp_ne_i32_e32 vcc,
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; SI-NEXT: buffer_store_byte [[RESULT]]
; SI-NEXT: s_endpgm
; GCN-NOT: v_cmp
; GCN: v_cmp_ne_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
@ -37,12 +38,12 @@ define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; This really folds away to false
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
; SI: v_cmp_eq_i32_e32 vcc,
; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
; SI-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
; SI-NEXT: buffer_store_byte [[TMP]]
; SI-NEXT: s_endpgm
; GCN: v_cmp_eq_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
; GCN-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
; GCN-NEXT: buffer_store_byte [[TMP]]
; GCN-NEXT: s_endpgm
define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = sext i1 %icmp0 to i32
@ -53,12 +54,12 @@ define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; This really folds away to true
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
; SI: v_cmp_ne_i32_e32 vcc,
; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
; SI-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
; SI-NEXT: buffer_store_byte [[TMP]]
; SI-NEXT: s_endpgm
; GCN: v_cmp_ne_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
; GCN-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
; GCN-NEXT: buffer_store_byte [[TMP]]
; GCN-NEXT: s_endpgm
define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
@ -68,11 +69,11 @@ define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
; SI-NOT: v_cmp
; SI: v_cmp_ne_i32_e32 vcc,
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; SI-NEXT: buffer_store_byte [[RESULT]]
; SI-NEXT: s_endpgm
; GCN-NOT: v_cmp
; GCN: v_cmp_ne_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = zext i1 %icmp0 to i32
@ -82,11 +83,11 @@ define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
; SI-NOT: v_cmp
; SI: v_cmp_ne_i32_e32 vcc,
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; SI-NEXT: buffer_store_byte [[RESULT]]
; SI-NEXT: s_endpgm
; GCN-NOT: v_cmp
; GCN: v_cmp_ne_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
@ -96,11 +97,11 @@ define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
; SI-NOT: v_cmp
; SI: v_cmp_eq_i32_e32 vcc,
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; SI-NEXT: buffer_store_byte [[RESULT]]
; SI-NEXT: s_endpgm
; GCN-NOT: v_cmp
; GCN: v_cmp_eq_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = zext i1 %icmp0 to i32
@ -110,10 +111,10 @@ define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
; SI-NOT: v_cmp
; SI: v_cmp_eq_i32_e32 vcc,
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; SI-NEXT: buffer_store_byte [[RESULT]]
; GCN-NOT: v_cmp
; GCN: v_cmp_eq_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
@ -125,11 +126,13 @@ define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; SI: buffer_store_byte
; SI: s_endpgm
; VI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; GCN: buffer_store_byte
; GCN: s_endpgm
define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
@ -139,12 +142,12 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
; SI: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
; SI: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; SI-NEXT: buffer_store_byte [[RESULT]]
; SI: s_endpgm
; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = zext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, 255
@ -153,11 +156,11 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
}
; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
; SI: buffer_load_sbyte [[B:v[0-9]+]]
; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; SI-NEXT: buffer_store_byte [[RESULT]]
; SI: s_endpgm
; GCN: buffer_load_sbyte [[B:v[0-9]+]]
; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
%b = load i8 addrspace(1)* %b.ptr
%b.ext = sext i8 %b to i32
@ -167,11 +170,11 @@ define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nou
}
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
; SI: s_load_dword [[B:s[0-9]+]]
; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; SI-NEXT: buffer_store_byte [[RESULT]]
; SI: s_endpgm
; GCN: s_load_dword [[B:s[0-9]+]]
; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
@ -184,12 +187,12 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
; Should do a buffer_load_sbyte and compare with -1
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]]
; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; SI-NEXT: buffer_store_byte [[RESULT]]
; SI: s_endpgm
; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
@ -198,9 +201,9 @@ define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
}
; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
; SI: buffer_store_byte [[RESULT]]
; SI: s_endpgm
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
; GCN: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = zext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
@ -209,9 +212,9 @@ define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
}
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
; SI: buffer_store_byte [[RESULT]]
; SI-NEXT: s_endpgm
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
; GCN: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
@ -221,9 +224,9 @@ define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
; SI: buffer_store_byte [[RESULT]]
; SI-NEXT: s_endpgm
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
; GCN: buffer_store_byte [[RESULT]]
; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32

View File

@ -1,8 +1,10 @@
; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
; SMRD load with an immediate offset.
; CHECK-LABEL: {{^}}smrd0:
; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; GCN-LABEL: {{^}}smrd0:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 1
@ -12,8 +14,9 @@ entry:
}
; SMRD load with the largest possible immediate offset.
; CHECK-LABEL: {{^}}smrd1:
; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; GCN-LABEL: {{^}}smrd1:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 255
@ -23,10 +26,11 @@ entry:
}
; SMRD load with an offset greater than the largest possible immediate.
; CHECK-LABEL: {{^}}smrd2:
; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; CHECK: s_endpgm
; GCN-LABEL: {{^}}smrd2:
; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 256
@ -36,17 +40,18 @@ entry:
}
; SMRD load with a 64-bit offset
; CHECK-LABEL: {{^}}smrd3:
; GCN-LABEL: {{^}}smrd3:
; FIXME: There are too many copies here because we don't fold immediates
; through REG_SEQUENCE
; CHECK: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
; CHECK: s_mov_b32 s[[SHI:[0-9]+]], 4
; CHECK: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
; SI: s_mov_b32 s[[SHI:[0-9]+]], 4
; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; FIXME: We should be able to use s_load_dword here
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
; CHECK: s_endpgm
; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
; TODO: Add VI checks
; GCN: s_endpgm
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
@ -56,8 +61,9 @@ entry:
}
; SMRD load using the load.const intrinsic with an immediate offset
; CHECK-LABEL: {{^}}smrd_load_const0:
; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; GCN-LABEL: {{^}}smrd_load_const0:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@ -69,8 +75,9 @@ main_body:
; SMRD load using the load.const intrinsic with the largest possible immediate
; offset.
; CHECK-LABEL: {{^}}smrd_load_const1:
; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; GCN-LABEL: {{^}}smrd_load_const1:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@ -82,9 +89,10 @@ main_body:
; SMRD load using the load.const intrinsic with an offset greater than the
; largets possible immediate.
; immediate offset.
; CHECK-LABEL: {{^}}smrd_load_const2:
; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; GCN-LABEL: {{^}}smrd_load_const2:
; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0

View File

@ -1,8 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lshr_i32:
; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
@ -17,6 +19,9 @@ define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
@ -34,6 +39,11 @@ define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@ -49,6 +59,7 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
; FUNC-LABEL: {{^}}lshr_i64:
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
@ -74,6 +85,9 @@ define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
@ -111,6 +125,11 @@ define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]

View File

@ -1,5 +1,5 @@
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
;FUNC-LABEL: {{^}}test_udiv:
@ -36,39 +36,40 @@
;EG: BFE_UINT
;EG: BFE_UINT
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN-NOT: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = udiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
@ -109,39 +110,40 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN: s_bfe_u32
;GCN-NOT: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
@ -152,10 +154,11 @@ define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN-NOT: s_bfe_u32
;GCN-NOT: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 33
%2 = lshr i64 %y, 33
@ -168,10 +171,11 @@ define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
;GCN-NOT: s_bfe_u32
;GCN-NOT: v_mad_f32
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: s_endpgm
define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 33
%2 = lshr i64 %y, 33
@ -187,9 +191,10 @@ define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: v_lshr_64
;SI: v_mad_f32
;SI: s_endpgm
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: v_mad_f32
;GCN: s_endpgm
define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 40
%2 = lshr i64 %y, 40
@ -205,9 +210,10 @@ define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: v_lshr_64
;SI: v_mad_f32
;SI: s_endpgm
;SI-NOT: v_lshr_b64
;VI-NOT: v_lshrrev_b64
;GCN: v_mad_f32
;GCN: s_endpgm
define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 40
%2 = lshr i64 %y, 40

View File

@ -1,80 +1,87 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
declare float @llvm.fma.f32(float, float, float) #1
declare float @llvm.fmuladd.f32(float, float, float) #1
declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
; SI-LABEL: {{^}}test_sgpr_use_twice_binop:
; SI: s_load_dword [[SGPR:s[0-9]+]],
; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
; SI: buffer_store_dword [[RESULT]]
; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
; GCN: s_load_dword [[SGPR:s[0-9]+]],
; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
%dbl = fadd float %a, %a
store float %dbl, float addrspace(1)* %out, align 4
ret void
}
; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op:
; SI: s_load_dword [[SGPR:s[0-9]+]],
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
; SI: buffer_store_dword [[RESULT]]
; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
; GCN: s_load_dword [[SGPR:s[0-9]+]],
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
; SI: buffer_store_dword [[RESULT]]
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
; SI: buffer_store_dword [[RESULT]]
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
; SI: buffer_store_dword [[RESULT]]
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
; SI: s_load_dword [[SGPR:s[0-9]+]]
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
; SI: buffer_store_dword [[RESULT]]
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
; GCN: s_load_dword [[SGPR:s[0-9]+]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
; SI: s_load_dword [[SGPR:s[0-9]+]]
; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
; SI: buffer_store_dword [[RESULT]]
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
; GCN: s_load_dword [[SGPR:s[0-9]+]]
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
@ -82,10 +89,10 @@ define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, fl
}
; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
; SI: s_load_dword [[SGPR:s[0-9]+]]
; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
; SI: buffer_store_dword [[RESULT]]
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
; GCN: s_load_dword [[SGPR:s[0-9]+]]
; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
%fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
store i32 %fma, i32 addrspace(1)* %out, align 4

View File

@ -1,14 +1,15 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}ngroups_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].X
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @ngroups_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.x() #0
@ -21,8 +22,9 @@ entry:
; EG: MOV [[VAL]], KC0[0].Y
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @ngroups_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.y() #0
@ -35,8 +37,9 @@ entry:
; EG: MOV [[VAL]], KC0[0].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @ngroups_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.z() #0
@ -49,8 +52,9 @@ entry:
; EG: MOV [[VAL]], KC0[0].W
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @global_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.x() #0
@ -63,8 +67,9 @@ entry:
; EG: MOV [[VAL]], KC0[1].X
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @global_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.y() #0
@ -77,8 +82,9 @@ entry:
; EG: MOV [[VAL]], KC0[1].Y
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @global_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.z() #0
@ -91,8 +97,9 @@ entry:
; EG: MOV [[VAL]], KC0[1].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @local_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.x() #0
@ -105,8 +112,9 @@ entry:
; EG: MOV [[VAL]], KC0[1].W
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @local_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.y() #0
@ -119,8 +127,9 @@ entry:
; EG: MOV [[VAL]], KC0[2].X
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @local_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.z() #0
@ -133,8 +142,9 @@ entry:
; EG: MOV [[VAL]], KC0[2].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[VVAL]]
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
define void @get_work_dim (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.AMDGPU.read.workdim() #0
@ -147,8 +157,8 @@ entry:
; kernel arguments, but this may change in the future.
; FUNC-LABEL: {{^}}tgid_x:
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
; SI: buffer_store_dword [[VVAL]]
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
; GCN: buffer_store_dword [[VVAL]]
define void @tgid_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
@ -157,8 +167,8 @@ entry:
}
; FUNC-LABEL: {{^}}tgid_y:
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
; SI: buffer_store_dword [[VVAL]]
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
; GCN: buffer_store_dword [[VVAL]]
define void @tgid_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
@ -167,8 +177,8 @@ entry:
}
; FUNC-LABEL: {{^}}tgid_z:
; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
; SI: buffer_store_dword [[VVAL]]
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
; GCN: buffer_store_dword [[VVAL]]
define void @tgid_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
@ -177,7 +187,7 @@ entry:
}
; FUNC-LABEL: {{^}}tidig_x:
; SI: buffer_store_dword v0
; GCN: buffer_store_dword v0
define void @tidig_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
@ -186,7 +196,7 @@ entry:
}
; FUNC-LABEL: {{^}}tidig_y:
; SI: buffer_store_dword v1
; GCN: buffer_store_dword v1
define void @tidig_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
@ -195,7 +205,7 @@ entry:
}
; FUNC-LABEL: {{^}}tidig_z:
; SI: buffer_store_dword v2
; GCN: buffer_store_dword v2
define void @tidig_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0