mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-09-24 23:28:41 +00:00
R600/SI: Change all instruction assembly names to lowercase.
This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$*" MATCHES=`grep -v Patterns SIInstructions.td | grep -o '"[A-Z0-9_]\+["e]' | grep -o '[A-Z0-9_]\+' | sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]*SI[A-Z\\-]*: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.*32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221350 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -298,7 +298,7 @@ class EXPCommon : InstSI<
|
||||
(outs),
|
||||
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
|
||||
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
|
||||
"EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
|
||||
"exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
|
||||
[] > {
|
||||
|
||||
let EXP_CNT = 1;
|
||||
@@ -308,10 +308,10 @@ class EXPCommon : InstSI<
|
||||
multiclass EXP_m {
|
||||
|
||||
let isPseudo = 1 in {
|
||||
def "" : EXPCommon, SIMCInstr <"EXP", SISubtarget.NONE> ;
|
||||
def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
|
||||
}
|
||||
|
||||
def _si : EXPCommon, SIMCInstr <"EXP", SISubtarget.SI>, EXPe;
|
||||
def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,7 @@
|
||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
|
||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
|
||||
; SI-CHECK: {{^}}v4i32_kernel_arg:
|
||||
; SI-CHECK: BUFFER_STORE_DWORDX4
|
||||
; SI-CHECK: buffer_store_dwordx4
|
||||
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
|
||||
entry:
|
||||
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
|
||||
@@ -20,7 +20,7 @@ entry:
|
||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
|
||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
|
||||
; SI-CHECK: {{^}}v4f32_kernel_arg:
|
||||
; SI-CHECK: BUFFER_STORE_DWORDX4
|
||||
; SI-CHECK: buffer_store_dwordx4
|
||||
define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
entry:
|
||||
store <4 x float> %in, <4 x float> addrspace(1)* %out
|
||||
|
@@ -11,8 +11,8 @@
|
||||
; instructions with B64, U64, and I64 take 64-bit operands.
|
||||
|
||||
; FUNC-LABEL: {{^}}local_address_load:
|
||||
; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
|
||||
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
|
||||
; CHECK: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
|
||||
; CHECK: ds_read_b32 v{{[0-9]+}}, [[PTR]]
|
||||
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
|
||||
entry:
|
||||
%0 = load i32 addrspace(3)* %in
|
||||
@@ -21,9 +21,9 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_address_gep:
|
||||
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
|
||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; CHECK: DS_READ_B32 [[VPTR]]
|
||||
; CHECK: s_add_i32 [[SPTR:s[0-9]]]
|
||||
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; CHECK: ds_read_b32 [[VPTR]]
|
||||
define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
|
||||
entry:
|
||||
%0 = getelementptr i32 addrspace(3)* %in, i32 %offset
|
||||
@@ -33,8 +33,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_address_gep_const_offset:
|
||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
|
||||
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: ds_read_b32 v{{[0-9]+}}, [[VPTR]], 0x4,
|
||||
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
|
||||
entry:
|
||||
%0 = getelementptr i32 addrspace(3)* %in, i32 1
|
||||
@@ -45,9 +45,9 @@ entry:
|
||||
|
||||
; Offset too large, can't fold into 16-bit immediate offset.
|
||||
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
|
||||
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
|
||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; CHECK: DS_READ_B32 [[VPTR]]
|
||||
; CHECK: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
|
||||
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; CHECK: ds_read_b32 [[VPTR]]
|
||||
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
|
||||
entry:
|
||||
%0 = getelementptr i32 addrspace(3)* %in, i32 16385
|
||||
@@ -57,8 +57,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
|
||||
; CHECK: V_CMP_NE_I32
|
||||
; CHECK-NOT: V_CMP_NE_I32
|
||||
; CHECK: v_cmp_ne_i32
|
||||
; CHECK-NOT: v_cmp_ne_i32
|
||||
; CHECK: V_CNDMASK_B32
|
||||
define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
|
||||
%cmp = icmp ne i32 addrspace(3)* %lds, null
|
||||
@@ -68,9 +68,9 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}mul_32bit_ptr:
|
||||
; CHECK: V_MUL_LO_I32
|
||||
; CHECK-NEXT: V_ADD_I32_e32
|
||||
; CHECK-NEXT: DS_READ_B32
|
||||
; CHECK: v_mul_lo_i32
|
||||
; CHECK-NEXT: v_add_i32_e32
|
||||
; CHECK-NEXT: ds_read_b32
|
||||
define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
|
||||
%ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
|
||||
%val = load float addrspace(3)* %ptr
|
||||
@@ -81,8 +81,8 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %
|
||||
@g_lds = addrspace(3) global float zeroinitializer, align 4
|
||||
|
||||
; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
|
||||
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0
|
||||
; CHECK: ds_read_b32 v{{[0-9]+}}, [[REG]]
|
||||
define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
|
||||
%val = load float addrspace(3)* @g_lds
|
||||
store float %val, float addrspace(1)* %out
|
||||
@@ -94,23 +94,23 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti
|
||||
@dst = addrspace(3) global [16384 x i32] zeroinitializer
|
||||
|
||||
; FUNC-LABEL: {{^}}global_ptr:
|
||||
; CHECK: DS_WRITE_B32
|
||||
; CHECK: ds_write_b32
|
||||
define void @global_ptr() nounwind {
|
||||
store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_address_store:
|
||||
; CHECK: DS_WRITE_B32
|
||||
; CHECK: ds_write_b32
|
||||
define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
|
||||
store i32 %val, i32 addrspace(3)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_address_gep_store:
|
||||
; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
|
||||
; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
|
||||
; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
|
||||
; CHECK: s_add_i32 [[SADDR:s[0-9]+]],
|
||||
; CHECK: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
|
||||
; CHECK: ds_write_b32 [[ADDR]], v{{[0-9]+}},
|
||||
define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
|
||||
%gep = getelementptr i32 addrspace(3)* %out, i32 %offset
|
||||
store i32 %val, i32 addrspace(3)* %gep, align 4
|
||||
@@ -118,9 +118,9 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
|
||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
|
||||
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: ds_write_b32 [[VPTR]], [[VAL]], 0x4
|
||||
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
|
||||
%gep = getelementptr i32 addrspace(3)* %out, i32 1
|
||||
store i32 %val, i32 addrspace(3)* %gep, align 4
|
||||
@@ -129,9 +129,9 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
|
||||
|
||||
; Offset too large, can't fold into 16-bit immediate offset.
|
||||
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
|
||||
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
|
||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
|
||||
; CHECK: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
|
||||
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; CHECK: ds_write_b32 [[VPTR]], v{{[0-9]+}}, 0
|
||||
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
|
||||
%gep = getelementptr i32 addrspace(3)* %out, i32 16385
|
||||
store i32 %val, i32 addrspace(3)* %gep, align 4
|
||||
|
@@ -1,9 +1,9 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
|
||||
|
||||
; SI-CHECK: {{^}}f64_kernel_arg:
|
||||
; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
|
||||
; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
|
||||
; SI-CHECK: BUFFER_STORE_DWORDX2
|
||||
; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
|
||||
; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
|
||||
; SI-CHECK: buffer_store_dwordx2
|
||||
define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
|
||||
entry:
|
||||
store double %in, double addrspace(1)* %out
|
||||
|
@@ -4,9 +4,9 @@
|
||||
;FUNC-LABEL: {{^}}test1:
|
||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
;SI-CHECK: V_ADD_I32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK-NOT: [[REG]]
|
||||
;SI-CHECK: BUFFER_STORE_DWORD [[REG]],
|
||||
;SI-CHECK: buffer_store_dword [[REG]],
|
||||
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
|
||||
%a = load i32 addrspace(1)* %in
|
||||
@@ -20,8 +20,8 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
|
||||
@@ -38,10 +38,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
|
||||
@@ -61,14 +61,14 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
||||
; EG-CHECK: ADD_INT
|
||||
; EG-CHECK: ADD_INT
|
||||
; EG-CHECK: ADD_INT
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
|
||||
entry:
|
||||
%0 = add <8 x i32> %a, %b
|
||||
@@ -93,22 +93,22 @@ entry:
|
||||
; EG-CHECK: ADD_INT
|
||||
; EG-CHECK: ADD_INT
|
||||
; EG-CHECK: ADD_INT
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
; SI-CHECK: s_add_i32
|
||||
define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
|
||||
entry:
|
||||
%0 = add <16 x i32> %a, %b
|
||||
@@ -117,8 +117,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}add64:
|
||||
; SI-CHECK: S_ADD_U32
|
||||
; SI-CHECK: S_ADDC_U32
|
||||
; SI-CHECK: s_add_u32
|
||||
; SI-CHECK: s_addc_u32
|
||||
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
entry:
|
||||
%0 = add i64 %a, %b
|
||||
@@ -126,13 +126,13 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; The V_ADDC_U32 and V_ADD_I32 instruction can't read SGPRs, because they
|
||||
; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
|
||||
; use VCC. The test is designed so that %a will be stored in an SGPR and
|
||||
; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
|
||||
; to a VGPR before doing the add.
|
||||
|
||||
; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
|
||||
; SI-CHECK-NOT: V_ADDC_U32_e32 s
|
||||
; SI-CHECK-NOT: v_addc_u32_e32 s
|
||||
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
|
||||
entry:
|
||||
%0 = load i64 addrspace(1)* %in
|
||||
@@ -143,8 +143,8 @@ entry:
|
||||
|
||||
; Test i64 add inside a branch.
|
||||
; FUNC-LABEL: {{^}}add64_in_branch:
|
||||
; SI-CHECK: S_ADD_U32
|
||||
; SI-CHECK: S_ADDC_U32
|
||||
; SI-CHECK: s_add_u32
|
||||
; SI-CHECK: s_addc_u32
|
||||
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
|
||||
entry:
|
||||
%0 = icmp eq i64 %a, 0
|
||||
|
@@ -4,8 +4,8 @@
|
||||
declare i32 @llvm.r600.read.tidig.x() readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_vreg:
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
|
||||
@@ -19,8 +19,8 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
|
||||
|
||||
; Check that the SGPR add operand is correctly moved to a VGPR.
|
||||
; SI-LABEL: {{^}}sgpr_operand:
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
|
||||
%foo = load i64 addrspace(1)* %in, align 8
|
||||
%result = add i64 %foo, %a
|
||||
@@ -32,8 +32,8 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
|
||||
; SGPR as other operand.
|
||||
;
|
||||
; SI-LABEL: {{^}}sgpr_operand_reversed:
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
|
||||
%foo = load i64 addrspace(1)* %in, align 8
|
||||
%result = add i64 %a, %foo
|
||||
@@ -43,10 +43,10 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace
|
||||
|
||||
|
||||
; SI-LABEL: {{^}}test_v2i64_sreg:
|
||||
; SI: S_ADD_U32
|
||||
; SI: S_ADDC_U32
|
||||
; SI: S_ADD_U32
|
||||
; SI: S_ADDC_U32
|
||||
; SI: s_add_u32
|
||||
; SI: s_addc_u32
|
||||
; SI: s_add_u32
|
||||
; SI: s_addc_u32
|
||||
define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
|
||||
%result = add <2 x i64> %a, %b
|
||||
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
|
||||
@@ -54,10 +54,10 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_v2i64_vreg:
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
|
||||
@@ -70,12 +70,12 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}trunc_i64_add_to_i32:
|
||||
; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
|
||||
; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
|
||||
; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
|
||||
; SI-NOT: ADDC
|
||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VRESULT]],
|
||||
; SI: s_load_dword s[[SREG0:[0-9]+]]
|
||||
; SI: s_load_dword s[[SREG1:[0-9]+]]
|
||||
; SI: s_add_i32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
|
||||
; SI-NOT: addc
|
||||
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[VRESULT]],
|
||||
define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
%add = add i64 %b, %a
|
||||
%trunc = trunc i64 %add to i32
|
||||
|
@@ -8,10 +8,10 @@
|
||||
; already in a VGPR after the first read.
|
||||
|
||||
; CHECK-LABEL: {{^}}do_as_ptr_calcs:
|
||||
; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
|
||||
; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
|
||||
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, [[VREG1]] offset:12
|
||||
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
|
||||
; CHECK: s_load_dword [[SREG1:s[0-9]+]],
|
||||
; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
|
||||
; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12
|
||||
; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
|
||||
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
|
||||
entry:
|
||||
%x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
|
||||
|
@@ -5,8 +5,8 @@
|
||||
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
|
||||
@@ -23,10 +23,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
|
||||
@@ -38,7 +38,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_i32:
|
||||
; SI: S_AND_B32
|
||||
; SI: s_and_b32
|
||||
define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
%and = and i32 %a, %b
|
||||
store i32 %and, i32 addrspace(1)* %out, align 4
|
||||
@@ -46,7 +46,7 @@ define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_constant_i32:
|
||||
; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
|
||||
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
|
||||
define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
|
||||
%and = and i32 %a, 1234567
|
||||
store i32 %and, i32 addrspace(1)* %out, align 4
|
||||
@@ -54,7 +54,7 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_and_i32:
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
|
||||
%a = load i32 addrspace(1)* %aptr, align 4
|
||||
%b = load i32 addrspace(1)* %bptr, align 4
|
||||
@@ -64,7 +64,7 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_and_constant_i32:
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
|
||||
%a = load i32 addrspace(1)* %aptr, align 4
|
||||
%and = and i32 %a, 1234567
|
||||
@@ -73,7 +73,7 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_i64:
|
||||
; SI: S_AND_B64
|
||||
; SI: s_and_b64
|
||||
define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
%and = and i64 %a, %b
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
@@ -82,7 +82,7 @@ define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
|
||||
; FIXME: Should use SGPRs
|
||||
; FUNC-LABEL: {{^}}s_and_i1:
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
|
||||
%and = and i1 %a, %b
|
||||
store i1 %and, i1 addrspace(1)* %out
|
||||
@@ -90,7 +90,7 @@ define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_constant_i64:
|
||||
; SI: S_AND_B64
|
||||
; SI: s_and_b64
|
||||
define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
|
||||
%and = and i64 %a, 281474976710655
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
@@ -98,8 +98,8 @@ define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_and_i64:
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
|
||||
%a = load i64 addrspace(1)* %aptr, align 8
|
||||
%b = load i64 addrspace(1)* %bptr, align 8
|
||||
@@ -109,8 +109,8 @@ define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addr
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_and_i64_br:
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
define void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i32 %cond) {
|
||||
entry:
|
||||
%tmp0 = icmp eq i32 %cond, 0
|
||||
@@ -129,8 +129,8 @@ endif:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_and_constant_i64:
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%a = load i64 addrspace(1)* %aptr, align 8
|
||||
%and = and i64 %a, 1234567
|
||||
@@ -140,8 +140,8 @@ define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr)
|
||||
|
||||
; FIXME: Replace and 0 with mov 0
|
||||
; FUNC-LABEL: {{^}}v_and_inline_imm_i64:
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
|
||||
define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%a = load i64 addrspace(1)* %aptr, align 8
|
||||
%and = and i64 %a, 64
|
||||
@@ -150,7 +150,7 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_inline_imm_i64:
|
||||
; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
|
||||
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
|
||||
define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
|
||||
%and = and i64 %a, 64
|
||||
store i64 %and, i64 addrspace(1)* %out, align 8
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: {{^}}anyext_i1_i32:
|
||||
; CHECK: V_CNDMASK_B32_e64
|
||||
; CHECK: v_cndmask_b32_e64
|
||||
define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
|
||||
entry:
|
||||
%0 = icmp eq i32 %cond, 0
|
||||
|
@@ -14,16 +14,16 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
|
||||
; FIXME: We end up with zero argument for ADD, because
|
||||
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
|
||||
; with the appropriate offset. We should fold this into the store.
|
||||
; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
|
||||
; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
|
||||
; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
|
||||
; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
|
||||
;
|
||||
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
|
||||
; alloca to a vector. It currently fails because it does not know how
|
||||
; to interpret:
|
||||
; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
|
||||
|
||||
; SI-PROMOTE: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 16
|
||||
; SI-PROMOTE: DS_WRITE_B32 [[PTRREG]]
|
||||
; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
|
||||
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
|
||||
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
|
||||
%alloca = alloca [4 x i32], i32 4, align 16
|
||||
%tid = call i32 @llvm.SI.tid() readnone
|
||||
|
@@ -3,8 +3,8 @@
|
||||
declare i32 @llvm.SI.tid() readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_array_ptr_calc:
|
||||
; SI: V_MUL_LO_I32
|
||||
; SI: V_MUL_HI_I32
|
||||
; SI: v_mul_lo_i32
|
||||
; SI: v_mul_hi_i32
|
||||
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
|
||||
%tid = call i32 @llvm.SI.tid() readnone
|
||||
%a_ptr = getelementptr [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
|
||||
|
@@ -2,13 +2,13 @@
|
||||
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
|
||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
|
||||
@@ -18,17 +18,17 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
|
||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI: S_MOV_B64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
|
||||
; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
|
||||
; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
|
||||
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
|
||||
; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
|
||||
@@ -38,9 +38,9 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
|
||||
; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
@@ -52,13 +52,13 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
|
||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
|
||||
; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; SI: DS_CMPST_B32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
|
||||
; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
|
||||
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
|
||||
@@ -67,16 +67,16 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
|
||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: S_MOV_B64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
|
||||
; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
|
||||
; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
|
||||
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
; SI: DS_CMPST_B64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
|
||||
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
|
||||
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
|
||||
|
@@ -3,7 +3,7 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_local:
|
||||
; R600: LDS_ADD *
|
||||
; SI: DS_ADD_U32
|
||||
; SI: ds_add_u32
|
||||
define void @atomic_add_local(i32 addrspace(3)* %local) {
|
||||
%unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
|
||||
ret void
|
||||
@@ -11,7 +11,7 @@ define void @atomic_add_local(i32 addrspace(3)* %local) {
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
|
||||
; R600: LDS_ADD *
|
||||
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
|
||||
%gep = getelementptr i32 addrspace(3)* %local, i32 4
|
||||
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
|
||||
@@ -20,7 +20,7 @@ define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_ret_local:
|
||||
; R600: LDS_ADD_RET *
|
||||
; SI: DS_ADD_RTN_U32
|
||||
; SI: ds_add_rtn_u32
|
||||
define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
|
||||
%val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
@@ -29,7 +29,7 @@ define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
|
||||
; R600: LDS_ADD_RET *
|
||||
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
|
||||
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
|
||||
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
|
||||
%gep = getelementptr i32 addrspace(3)* %local, i32 5
|
||||
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
|
||||
|
@@ -3,7 +3,7 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_local:
|
||||
; R600: LDS_SUB *
|
||||
; SI: DS_SUB_U32
|
||||
; SI: ds_sub_u32
|
||||
define void @atomic_sub_local(i32 addrspace(3)* %local) {
|
||||
%unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
|
||||
ret void
|
||||
@@ -11,7 +11,7 @@ define void @atomic_sub_local(i32 addrspace(3)* %local) {
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
|
||||
; R600: LDS_SUB *
|
||||
; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
|
||||
%gep = getelementptr i32 addrspace(3)* %local, i32 4
|
||||
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
|
||||
@@ -20,7 +20,7 @@ define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_ret_local:
|
||||
; R600: LDS_SUB_RET *
|
||||
; SI: DS_SUB_RTN_U32
|
||||
; SI: ds_sub_rtn_u32
|
||||
define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
|
||||
%val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
@@ -29,7 +29,7 @@ define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
|
||||
; R600: LDS_SUB_RET *
|
||||
; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
|
||||
; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
|
||||
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
|
||||
%gep = getelementptr i32 addrspace(3)* %local, i32 5
|
||||
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
|
||||
|
@@ -7,7 +7,7 @@
|
||||
; R600-CHECK: {{^}}bfi_def:
|
||||
; R600-CHECK: BFI_INT
|
||||
; SI-CHECK: @bfi_def
|
||||
; SI-CHECK: V_BFI_B32
|
||||
; SI-CHECK: v_bfi_b32
|
||||
define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
%0 = xor i32 %x, -1
|
||||
@@ -23,7 +23,7 @@ entry:
|
||||
; R600-CHECK: {{^}}bfi_sha256_ch:
|
||||
; R600-CHECK: BFI_INT
|
||||
; SI-CHECK: @bfi_sha256_ch
|
||||
; SI-CHECK: V_BFI_B32
|
||||
; SI-CHECK: v_bfi_b32
|
||||
define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
%0 = xor i32 %y, %z
|
||||
@@ -38,8 +38,8 @@ entry:
|
||||
; R600-CHECK: {{^}}bfi_sha256_ma:
|
||||
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
|
||||
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
|
||||
; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
|
||||
; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
|
||||
; SI-CHECK: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
|
||||
; SI-CHECK: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
|
@@ -5,7 +5,7 @@
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
|
||||
entry:
|
||||
%1 = load <32 x i8> addrspace(2)* %0
|
||||
@@ -18,7 +18,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
|
||||
entry:
|
||||
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
|
||||
@@ -56,7 +56,7 @@ define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nou
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
%val = load <2 x i32> addrspace(1)* %in, align 8
|
||||
%add = add <2 x i32> %val, <i32 4, i32 9>
|
||||
@@ -66,7 +66,7 @@ define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%val = load double addrspace(1)* %in, align 8
|
||||
%add = fadd double %val, 4.0
|
||||
|
@@ -9,13 +9,13 @@ declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
|
||||
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @test_bswap_i32
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
|
||||
; SI-DAG: V_ALIGNBIT_B32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
|
||||
; SI-DAG: V_ALIGNBIT_B32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
|
||||
; SI-DAG: S_MOV_B32 [[K:s[0-9]+]], 0xff00ff
|
||||
; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; SI-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
|
||||
; SI-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
|
||||
; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff
|
||||
; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%val = load i32 addrspace(1)* %in, align 4
|
||||
%bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
|
||||
@@ -24,13 +24,13 @@ define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_bswap_v2i32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI: s_endpgm
|
||||
define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
|
||||
%val = load <2 x i32> addrspace(1)* %in, align 8
|
||||
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
|
||||
@@ -39,19 +39,19 @@ define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_bswap_v4i32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI: s_endpgm
|
||||
define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
|
||||
%val = load <4 x i32> addrspace(1)* %in, align 16
|
||||
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
|
||||
@@ -60,31 +60,31 @@ define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_bswap_v8i32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_ALIGNBIT_B32
|
||||
; SI-DAG: V_BFI_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_alignbit_b32
|
||||
; SI-DAG: v_bfi_b32
|
||||
; SI: s_endpgm
|
||||
define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
|
||||
%val = load <8 x i32> addrspace(1)* %in, align 32
|
||||
%bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
|
||||
|
@@ -6,9 +6,9 @@
|
||||
; R600-CHECK: MOV
|
||||
; R600-CHECK-NOT: MOV
|
||||
; SI-CHECK: {{^}}build_vector2:
|
||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
|
||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
|
||||
; SI-CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[X]]:[[Y]]{{\]}}
|
||||
; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
|
||||
; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
|
||||
; SI-CHECK: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
|
||||
define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
|
||||
entry:
|
||||
store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
|
||||
@@ -22,11 +22,11 @@ entry:
|
||||
; R600-CHECK: MOV
|
||||
; R600-CHECK-NOT: MOV
|
||||
; SI-CHECK: {{^}}build_vector4:
|
||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
|
||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
|
||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Z:[0-9]]], 7
|
||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[W:[0-9]]], 8
|
||||
; SI-CHECK: BUFFER_STORE_DWORDX4 v{{\[}}[[X]]:[[W]]{{\]}}
|
||||
; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
|
||||
; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
|
||||
; SI-CHECK-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
|
||||
; SI-CHECK-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
|
||||
; SI-CHECK: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
|
||||
define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
|
||||
entry:
|
||||
store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out
|
||||
|
@@ -8,8 +8,8 @@ target triple = "r600--"
|
||||
; OPT: mul nsw i32
|
||||
; OPT-NEXT: sext
|
||||
; SI-LLC-LABEL: {{^}}test:
|
||||
; SI-LLC: S_MUL_I32
|
||||
; SI-LLC-NOT: MUL
|
||||
; SI-LLC: s_mul_i32
|
||||
; SI-LLC-NOT: mul
|
||||
define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
|
||||
entry:
|
||||
%0 = mul nsw i32 %a, 3
|
||||
|
@@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
|
||||
; FUNC-LABEL: @commute_add_imm_fabs_f32
|
||||
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: V_ADD_F32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
|
||||
; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
@@ -18,9 +18,9 @@ define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
|
||||
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: V_MUL_F32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
|
||||
; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
@@ -33,9 +33,9 @@ define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @commute_mul_imm_fneg_f32
|
||||
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: V_MUL_F32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
|
||||
; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
|
@@ -5,8 +5,8 @@
|
||||
; instructions that access scratch memory. Bit 23, which is the add_tid_enable
|
||||
; bit, is only set for scratch access, so we can check for the absence of this
|
||||
; value if we want to ensure scratch memory is not being used.
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
|
||||
%concat = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
|
||||
store <2 x i32> %concat, <2 x i32> addrspace(1)* %out, align 8
|
||||
@@ -14,8 +14,8 @@ define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v2i32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
%concat = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x i32> %concat, <4 x i32> addrspace(1)* %out, align 16
|
||||
@@ -23,8 +23,8 @@ define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v4i32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
%concat = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x i32> %concat, <8 x i32> addrspace(1)* %out, align 32
|
||||
@@ -32,8 +32,8 @@ define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v8i32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
%concat = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
store <16 x i32> %concat, <16 x i32> addrspace(1)* %out, align 64
|
||||
@@ -41,8 +41,8 @@ define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v16i32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
%concat = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
store <32 x i32> %concat, <32 x i32> addrspace(1)* %out, align 128
|
||||
@@ -50,8 +50,8 @@ define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <1
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v1f32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind {
|
||||
%concat = shufflevector <1 x float> %a, <1 x float> %b, <2 x i32> <i32 0, i32 1>
|
||||
store <2 x float> %concat, <2 x float> addrspace(1)* %out, align 8
|
||||
@@ -59,8 +59,8 @@ define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v2f32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
|
||||
%concat = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x float> %concat, <4 x float> addrspace(1)* %out, align 16
|
||||
@@ -68,8 +68,8 @@ define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v4f32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
|
||||
%concat = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x float> %concat, <8 x float> addrspace(1)* %out, align 32
|
||||
@@ -77,8 +77,8 @@ define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v8f32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
|
||||
%concat = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
store <16 x float> %concat, <16 x float> addrspace(1)* %out, align 64
|
||||
@@ -86,8 +86,8 @@ define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v16f32:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
|
||||
%concat = shufflevector <16 x float> %a, <16 x float> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
store <32 x float> %concat, <32 x float> addrspace(1)* %out, align 128
|
||||
@@ -95,8 +95,8 @@ define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v1i64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
|
||||
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
|
||||
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
|
||||
@@ -104,8 +104,8 @@ define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v2i64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
|
||||
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
|
||||
@@ -113,8 +113,8 @@ define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v4i64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
|
||||
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
|
||||
@@ -122,8 +122,8 @@ define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v8i64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
|
||||
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
|
||||
@@ -131,8 +131,8 @@ define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v16i64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
|
||||
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
|
||||
@@ -140,8 +140,8 @@ define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v1f64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
|
||||
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
|
||||
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
|
||||
@@ -149,8 +149,8 @@ define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v2f64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
|
||||
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
|
||||
@@ -158,8 +158,8 @@ define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v4f64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
|
||||
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
|
||||
@@ -167,8 +167,8 @@ define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v8f64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
|
||||
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
|
||||
@@ -176,8 +176,8 @@ define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v16f64:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
|
||||
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
|
||||
@@ -185,8 +185,8 @@ define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v1i1:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind {
|
||||
%concat = shufflevector <1 x i1> %a, <1 x i1> %b, <2 x i32> <i32 0, i32 1>
|
||||
store <2 x i1> %concat, <2 x i1> addrspace(1)* %out
|
||||
@@ -194,8 +194,8 @@ define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v2i1:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind {
|
||||
%concat = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x i1> %concat, <4 x i1> addrspace(1)* %out
|
||||
@@ -203,8 +203,8 @@ define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v4i1:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind {
|
||||
%concat = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x i1> %concat, <8 x i1> addrspace(1)* %out
|
||||
@@ -212,8 +212,8 @@ define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v8i1:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind {
|
||||
%concat = shufflevector <8 x i1> %a, <8 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
store <16 x i1> %concat, <16 x i1> addrspace(1)* %out
|
||||
@@ -221,8 +221,8 @@ define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v16i1:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind {
|
||||
%concat = shufflevector <16 x i1> %a, <16 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
store <32 x i1> %concat, <32 x i1> addrspace(1)* %out
|
||||
@@ -230,8 +230,8 @@ define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v32i1:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind {
|
||||
%concat = shufflevector <32 x i1> %a, <32 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
||||
store <64 x i1> %concat, <64 x i1> addrspace(1)* %out
|
||||
@@ -239,8 +239,8 @@ define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v1i16:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
|
||||
%concat = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> <i32 0, i32 1>
|
||||
store <2 x i16> %concat, <2 x i16> addrspace(1)* %out, align 4
|
||||
@@ -248,8 +248,8 @@ define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v2i16:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
|
||||
%concat = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x i16> %concat, <4 x i16> addrspace(1)* %out, align 8
|
||||
@@ -257,8 +257,8 @@ define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v4i16:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
|
||||
%concat = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x i16> %concat, <8 x i16> addrspace(1)* %out, align 16
|
||||
@@ -266,8 +266,8 @@ define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v8i16:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
%concat = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
store <16 x i16> %concat, <16 x i16> addrspace(1)* %out, align 32
|
||||
@@ -275,8 +275,8 @@ define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v16i16:
|
||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: MOVREL
|
||||
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
|
||||
; SI-NOT: movrel
|
||||
define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind {
|
||||
%concat = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64
|
||||
|
@@ -1,9 +1,9 @@
|
||||
; RUN: llc -march=r600 -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v4i8:
|
||||
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[REG:v[0-9]+]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load <4 x i8> addrspace(1)* %in, align 4
|
||||
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
|
||||
@@ -11,10 +11,10 @@ define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)*
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v4i8_x2:
|
||||
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[REG:v[0-9]+]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load <4 x i8> addrspace(1)* %in, align 4
|
||||
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
|
||||
@@ -23,11 +23,11 @@ define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v4i8_x3:
|
||||
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[REG:v[0-9]+]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load <4 x i8> addrspace(1)* %in, align 4
|
||||
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
|
||||
@@ -37,12 +37,12 @@ define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v4i8_x4:
|
||||
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[REG:v[0-9]+]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load <4 x i8> addrspace(1)* %in, align 4
|
||||
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
|
||||
@@ -53,33 +53,33 @@ define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v4i8_extra_use:
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI-DAG: V_ADD
|
||||
; SI-DAG: V_ADD
|
||||
; SI-DAG: V_ADD
|
||||
; SI-DAG: V_ADD
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI_DAG: BUFFER_STORE_BYTE
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI-DAG: v_add
|
||||
; SI-DAG: v_add
|
||||
; SI-DAG: v_add
|
||||
; SI-DAG: v_add
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI_DAG: buffer_store_byte
|
||||
|
||||
; After scalarizing v4i8 loads is fixed.
|
||||
; XSI: BUFFER_LOAD_DWORD
|
||||
; XSI: buffer_load_dword
|
||||
; XSI: V_BFE
|
||||
; XSI: V_ADD
|
||||
; XSI: V_ADD
|
||||
; XSI: V_ADD
|
||||
; XSI: BUFFER_STORE_DWORD
|
||||
; XSI: BUFFER_STORE_DWORD
|
||||
; XSI: buffer_store_dword
|
||||
; XSI: buffer_store_dword
|
||||
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load <4 x i8> addrspace(1)* %in, align 4
|
||||
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
|
||||
@@ -89,35 +89,35 @@ define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> add
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v4i8_x2_extra_use:
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI-DAG: V_ADD
|
||||
; SI-DAG: V_ADD
|
||||
; SI-DAG: V_ADD
|
||||
; SI-DAG: V_ADD
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI_DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI-DAG: BUFFER_STORE_BYTE
|
||||
; SI_DAG: BUFFER_STORE_BYTE
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI-DAG: v_add
|
||||
; SI-DAG: v_add
|
||||
; SI-DAG: v_add
|
||||
; SI-DAG: v_add
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI_DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI-DAG: buffer_store_byte
|
||||
; SI_DAG: buffer_store_byte
|
||||
|
||||
; XSI: BUFFER_LOAD_DWORD
|
||||
; XSI: buffer_load_dword
|
||||
; XSI: BFE
|
||||
; XSI: BUFFER_STORE_DWORD
|
||||
; XSI: buffer_store_dword
|
||||
; XSI: V_ADD
|
||||
; XSI: BUFFER_STORE_DWORD
|
||||
; XSI-NEXT: BUFFER_STORE_DWORD
|
||||
; XSI: buffer_store_dword
|
||||
; XSI-NEXT: buffer_store_dword
|
||||
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load <4 x i8> addrspace(1)* %in, align 4
|
||||
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
|
||||
@@ -128,9 +128,9 @@ define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v3i8:
|
||||
; SI-NOT: BFE
|
||||
; SI-NOT: BFI
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: bfe
|
||||
; SI-NOT: bfi
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load <3 x i8> addrspace(1)* %in, align 4
|
||||
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
|
||||
@@ -138,11 +138,11 @@ define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)*
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load volatile <4 x i8> addrspace(1)* %in, align 4
|
||||
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
|
||||
@@ -150,15 +150,15 @@ define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8>
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_store:
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_STORE_BYTE
|
||||
; SI: BUFFER_STORE_BYTE
|
||||
; SI: BUFFER_STORE_BYTE
|
||||
; SI: BUFFER_STORE_BYTE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
; SI: s_endpgm
|
||||
define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
|
||||
%val = load <4 x i8> addrspace(1)* %in, align 4
|
||||
store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
|
||||
|
@@ -6,11 +6,11 @@ declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
|
||||
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
|
||||
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
|
||||
; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VRESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]],
|
||||
; SI: s_flbit_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[VRESULT]],
|
||||
; SI: s_endpgm
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
@@ -20,10 +20,10 @@ define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
@@ -34,11 +34,11 @@ define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v2i32:
|
||||
; SI: BUFFER_LOAD_DWORDX2
|
||||
; SI: V_FFBH_U32_e32
|
||||
; SI: V_FFBH_U32_e32
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
@@ -50,13 +50,13 @@ define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v4i32:
|
||||
; SI: BUFFER_LOAD_DWORDX4
|
||||
; SI: V_FFBH_U32_e32
|
||||
; SI: V_FFBH_U32_e32
|
||||
; SI: V_FFBH_U32_e32
|
||||
; SI: V_FFBH_U32_e32
|
||||
; SI: BUFFER_STORE_DWORDX4
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
|
@@ -8,11 +8,11 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
|
||||
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_i32:
|
||||
; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]],
|
||||
; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]]
|
||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VRESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[SVAL:s[0-9]+]],
|
||||
; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
|
||||
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[VRESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
@@ -23,11 +23,11 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
|
||||
; XXX - Why 0 in register?
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
@@ -38,13 +38,13 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
|
||||
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
|
||||
; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; SI: buffer_load_dword [[VAL1:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
|
||||
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@@ -59,11 +59,11 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
|
||||
; SI-NEXT: S_WAITCNT
|
||||
; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
|
||||
; SI-NEXT: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
|
||||
; SI-NEXT: s_waitcnt
|
||||
; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
|
||||
%val0 = load i32 addrspace(1)* %in0, align 4
|
||||
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
|
||||
@@ -73,9 +73,9 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@@ -87,11 +87,11 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@@ -105,15 +105,15 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@@ -131,23 +131,23 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: V_BCNT_U32_B32_e32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: v_bcnt_u32_b32_e32
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
@@ -173,10 +173,10 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
@@ -188,10 +188,10 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
@@ -203,11 +203,11 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f
|
||||
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i32 addrspace(1)* %in, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
@@ -217,11 +217,11 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
|
||||
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
|
||||
@@ -233,11 +233,11 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
|
||||
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
|
||||
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
|
||||
@@ -249,11 +249,11 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10
|
||||
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
|
||||
; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10
|
||||
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BCNT_INT
|
||||
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
|
||||
@@ -270,11 +270,11 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
|
||||
; but there are some cases when the should be allowed.
|
||||
|
||||
; FUNC-LABEL: {{^}}ctpop_i32_in_br:
|
||||
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: V_MOV_B32_e32 [[RESULT]], [[SRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; EG: BCNT_INT
|
||||
define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
|
||||
entry:
|
||||
|
@@ -7,11 +7,11 @@ declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
|
||||
declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_i64:
|
||||
; SI: S_LOAD_DWORDX2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: S_BCNT1_I32_B64 [[SRESULT:s[0-9]+]], [[SVAL]]
|
||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VRESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
|
||||
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[VRESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
|
||||
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
|
||||
%truncctpop = trunc i64 %ctpop to i32
|
||||
@@ -20,12 +20,12 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_i64:
|
||||
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
|
||||
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
|
||||
; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
|
||||
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
|
||||
; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
|
||||
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i64 addrspace(1)* %in, align 8
|
||||
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
|
||||
@@ -35,9 +35,9 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
|
||||
; SI: S_BCNT1_I32_B64
|
||||
; SI: S_BCNT1_I32_B64
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_endpgm
|
||||
define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
|
||||
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
|
||||
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
|
||||
@@ -46,11 +46,11 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_v4i64:
|
||||
; SI: S_BCNT1_I32_B64
|
||||
; SI: S_BCNT1_I32_B64
|
||||
; SI: S_BCNT1_I32_B64
|
||||
; SI: S_BCNT1_I32_B64
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_bcnt1_i32_b64
|
||||
; SI: s_endpgm
|
||||
define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
|
||||
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
|
||||
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
|
||||
@@ -59,11 +59,11 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v2i64:
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: s_endpgm
|
||||
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
|
||||
%val = load <2 x i64> addrspace(1)* %in, align 16
|
||||
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
|
||||
@@ -73,15 +73,15 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctpop_v4i64:
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: V_BCNT_U32_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: v_bcnt_u32_b32
|
||||
; SI: s_endpgm
|
||||
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
|
||||
%val = load <4 x i64> addrspace(1)* %in, align 32
|
||||
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
|
||||
@@ -94,12 +94,12 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
|
||||
; but there are some cases when the should be allowed.
|
||||
|
||||
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
|
||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; SI: S_BCNT1_I32_B64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
|
||||
; SI: V_MOV_B32_e32 v[[VLO:[0-9]+]], [[RESULT]]
|
||||
; SI: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
|
||||
; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
|
||||
; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
|
||||
; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
|
||||
; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
|
||||
entry:
|
||||
%tmp0 = icmp eq i32 %cond, 0
|
||||
|
@@ -6,11 +6,11 @@ declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
|
||||
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
|
||||
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
|
||||
; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VRESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]],
|
||||
; SI: s_ff1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[VRESULT]],
|
||||
; SI: s_endpgm
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
@@ -20,10 +20,10 @@ define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbl_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
@@ -34,11 +34,11 @@ define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v2i32:
|
||||
; SI: BUFFER_LOAD_DWORDX2
|
||||
; SI: V_FFBL_B32_e32
|
||||
; SI: V_FFBL_B32_e32
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: v_ffbl_b32_e32
|
||||
; SI: v_ffbl_b32_e32
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
@@ -50,13 +50,13 @@ define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v4i32:
|
||||
; SI: BUFFER_LOAD_DWORDX4
|
||||
; SI: V_FFBL_B32_e32
|
||||
; SI: V_FFBL_B32_e32
|
||||
; SI: V_FFBL_B32_e32
|
||||
; SI: V_FFBL_B32_e32
|
||||
; SI: BUFFER_STORE_DWORDX4
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: v_ffbl_b32_e32
|
||||
; SI: v_ffbl_b32_e32
|
||||
; SI: v_ffbl_b32_e32
|
||||
; SI: v_ffbl_b32_e32
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
|
@@ -1,11 +1,11 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}load_i8_to_f32:
|
||||
; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]],
|
||||
; SI-NOT: BFE
|
||||
; SI-NOT: LSHR
|
||||
; SI: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
|
||||
; SI: BUFFER_STORE_DWORD [[CONV]],
|
||||
; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
|
||||
; SI-NOT: bfe
|
||||
; SI-NOT: lshr
|
||||
; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
|
||||
; SI: buffer_store_dword [[CONV]],
|
||||
define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
|
||||
%load = load i8 addrspace(1)* %in, align 1
|
||||
%cvt = uitofp i8 %load to float
|
||||
@@ -14,13 +14,13 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}load_v2i8_to_v2f32:
|
||||
; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]],
|
||||
; SI-NOT: BFE
|
||||
; SI-NOT: LSHR
|
||||
; SI-NOT: AND
|
||||
; SI-DAG: V_CVT_F32_UBYTE1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
; SI: buffer_load_ushort [[LOADREG:v[0-9]+]],
|
||||
; SI-NOT: bfe
|
||||
; SI-NOT: lshr
|
||||
; SI-NOT: and
|
||||
; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <2 x i8> addrspace(1)* %in, align 1
|
||||
%cvt = uitofp <2 x i8> %load to <2 x float>
|
||||
@@ -29,12 +29,12 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}load_v3i8_to_v3f32:
|
||||
; SI-NOT: BFE
|
||||
; SI-NOT: V_CVT_F32_UBYTE3_e32
|
||||
; SI-DAG: V_CVT_F32_UBYTE2_e32
|
||||
; SI-DAG: V_CVT_F32_UBYTE1_e32
|
||||
; SI-DAG: V_CVT_F32_UBYTE0_e32
|
||||
; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
; SI-NOT: bfe
|
||||
; SI-NOT: v_cvt_f32_ubyte3_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte2_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte1_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <3 x i8> addrspace(1)* %in, align 1
|
||||
%cvt = uitofp <3 x i8> %load to <3 x float>
|
||||
@@ -43,18 +43,18 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}load_v4i8_to_v4f32:
|
||||
; We can't use BUFFER_LOAD_DWORD here, because the load is byte aligned, and
|
||||
; BUFFER_LOAD_DWORD requires dword alignment.
|
||||
; SI: BUFFER_LOAD_USHORT
|
||||
; SI: BUFFER_LOAD_USHORT
|
||||
; SI: V_OR_B32_e32 [[LOADREG:v[0-9]+]]
|
||||
; SI-NOT: BFE
|
||||
; SI-NOT: LSHR
|
||||
; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, [[LOADREG]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, [[LOADREG]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI: BUFFER_STORE_DWORDX4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
; We can't use buffer_load_dword here, because the load is byte aligned, and
|
||||
; buffer_load_dword requires dword alignment.
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]]
|
||||
; SI-NOT: bfe
|
||||
; SI-NOT: lshr
|
||||
; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[LOADREG]]
|
||||
; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <4 x i8> addrspace(1)* %in, align 1
|
||||
%cvt = uitofp <4 x i8> %load to <4 x float>
|
||||
@@ -62,27 +62,27 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
|
||||
ret void
|
||||
}
|
||||
|
||||
; XXX - This should really still be able to use the V_CVT_F32_UBYTE0
|
||||
; XXX - This should really still be able to use the v_cvt_f32_ubyte0
|
||||
; for each component, but computeKnownBits doesn't handle vectors very
|
||||
; well.
|
||||
|
||||
; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses:
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI: V_CVT_F32_UBYTE0_e32
|
||||
; SI: V_CVT_F32_UBYTE0_e32
|
||||
; SI: V_CVT_F32_UBYTE0_e32
|
||||
; SI: V_CVT_F32_UBYTE0_e32
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: buffer_load_ubyte
|
||||
; SI: v_cvt_f32_ubyte0_e32
|
||||
; SI: v_cvt_f32_ubyte0_e32
|
||||
; SI: v_cvt_f32_ubyte0_e32
|
||||
; SI: v_cvt_f32_ubyte0_e32
|
||||
|
||||
; XXX - replace with this when v4i8 loads aren't scalarized anymore.
|
||||
; XSI: BUFFER_LOAD_DWORD
|
||||
; XSI: V_CVT_F32_U32_e32
|
||||
; XSI: V_CVT_F32_U32_e32
|
||||
; XSI: V_CVT_F32_U32_e32
|
||||
; XSI: V_CVT_F32_U32_e32
|
||||
; SI: S_ENDPGM
|
||||
; XSI: buffer_load_dword
|
||||
; XSI: v_cvt_f32_u32_e32
|
||||
; XSI: v_cvt_f32_u32_e32
|
||||
; XSI: v_cvt_f32_u32_e32
|
||||
; XSI: v_cvt_f32_u32_e32
|
||||
; SI: s_endpgm
|
||||
define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <4 x i8> addrspace(1)* %in, align 4
|
||||
%cvt = uitofp <4 x i8> %load to <4 x float>
|
||||
@@ -94,7 +94,7 @@ define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <
|
||||
|
||||
; Make sure this doesn't crash.
|
||||
; SI-LABEL: {{^}}load_v7i8_to_v7f32:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <7 x i8> addrspace(1)* %in, align 1
|
||||
%cvt = uitofp <7 x i8> %load to <7 x float>
|
||||
@@ -103,27 +103,27 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}load_v8i8_to_v8f32:
|
||||
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
|
||||
; SI-NOT: BFE
|
||||
; SI-NOT: LSHR
|
||||
; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[LOLOAD]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[LOLOAD]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[LOLOAD]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[LOLOAD]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[HILOAD]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[HILOAD]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[HILOAD]]
|
||||
; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[HILOAD]]
|
||||
; SI-NOT: BFE
|
||||
; SI-NOT: LSHR
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: BUFFER_STORE_DWORD
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
|
||||
; SI-NOT: bfe
|
||||
; SI-NOT: lshr
|
||||
; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]]
|
||||
; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[LOLOAD]]
|
||||
; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[LOLOAD]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[LOLOAD]]
|
||||
; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[HILOAD]]
|
||||
; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[HILOAD]]
|
||||
; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[HILOAD]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
|
||||
; SI-NOT: bfe
|
||||
; SI-NOT: lshr
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
; SI: buffer_store_dword
|
||||
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <8 x i8> addrspace(1)* %in, align 1
|
||||
%cvt = uitofp <8 x i8> %load to <8 x float>
|
||||
@@ -132,10 +132,10 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
|
||||
; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
|
||||
; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
|
||||
; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]]
|
||||
; SI: BUFFER_STORE_DWORD [[CONV]],
|
||||
; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
|
||||
; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
|
||||
; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
|
||||
; SI: buffer_store_dword [[CONV]],
|
||||
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%add = add i32 %load, 2
|
||||
|
@@ -7,21 +7,21 @@ declare void @llvm.AMDGPU.barrier.local() #1
|
||||
; Function Attrs: nounwind
|
||||
; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
|
||||
; CHECK: BB0_1:
|
||||
; CHECK: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
|
||||
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]]
|
||||
; SI-DAG: V_ADD_I32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
|
||||
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]]
|
||||
; SI-DAG: V_ADD_I32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
|
||||
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]]
|
||||
; SI-DAG: V_ADD_I32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
|
||||
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]]
|
||||
; SI-DAG: V_ADD_I32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
|
||||
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]]
|
||||
; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
|
||||
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
|
||||
; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
|
||||
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
|
||||
; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
|
||||
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
|
||||
; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
|
||||
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
|
||||
; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
|
||||
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
|
||||
|
||||
; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
|
||||
; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
|
||||
; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]] offset:256
|
||||
; CHECK: S_ENDPGM
|
||||
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
|
||||
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
|
||||
; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
|
||||
; CHECK: s_endpgm
|
||||
define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
|
||||
entry:
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #0
|
||||
|
@@ -7,11 +7,11 @@
|
||||
@lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
|
||||
|
||||
; SI-LABEL: @simple_read2_f32
|
||||
; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
|
||||
; SI: S_WAITCNT lgkmcnt(0)
|
||||
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
@@ -26,11 +26,11 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f32_max_offset
|
||||
; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
|
||||
; SI: S_WAITCNT lgkmcnt(0)
|
||||
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
@@ -45,10 +45,10 @@ define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f32_too_far
|
||||
; SI-NOT DS_READ2_B32
|
||||
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT ds_read2_b32
|
||||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
@@ -63,9 +63,9 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f32_x2
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 0
|
||||
@@ -94,10 +94,10 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
|
||||
|
||||
; Make sure there is an instruction between the two sets of reads.
|
||||
; SI-LABEL: @simple_read2_f32_x2_barrier
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI: S_BARRIER
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI: s_barrier
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 0
|
||||
@@ -130,9 +130,9 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
|
||||
; element results in only folding the inner pair.
|
||||
|
||||
; SI-LABEL: @simple_read2_f32_x2_nonzero_base
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
@@ -166,10 +166,10 @@ define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
|
||||
; register. We can't safely merge this.
|
||||
|
||||
; SI-LABEL: @read2_ptr_is_subreg_arg_f32
|
||||
; SI-NOT: DS_READ2_B32
|
||||
; SI: DS_READ_B32
|
||||
; SI: DS_READ_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2_b32
|
||||
; SI: ds_read_b32
|
||||
; SI: ds_read_b32
|
||||
; SI: s_endpgm
|
||||
define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
|
||||
@@ -192,10 +192,10 @@ define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float ad
|
||||
; subregisters.
|
||||
|
||||
; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32
|
||||
; SI-NOT: DS_READ2_B32
|
||||
; SI: DS_READ_B32
|
||||
; SI: DS_READ_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2_b32
|
||||
; SI: ds_read_b32
|
||||
; SI: ds_read_b32
|
||||
; SI: s_endpgm
|
||||
define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
|
||||
@@ -217,10 +217,10 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f
|
||||
}
|
||||
|
||||
; We should be able to merge in this case, but probably not worth the effort.
|
||||
; SI-NOT: DS_READ2_B32
|
||||
; SI: DS_READ_B32
|
||||
; SI: DS_READ_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2_b32
|
||||
; SI: ds_read_b32
|
||||
; SI: ds_read_b32
|
||||
; SI: s_endpgm
|
||||
define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0
|
||||
@@ -241,10 +241,10 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f32_volatile_0
|
||||
; SI-NOT DS_READ2_B32
|
||||
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT ds_read2_b32
|
||||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
@@ -259,10 +259,10 @@ define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f32_volatile_1
|
||||
; SI-NOT DS_READ2_B32
|
||||
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT ds_read2_b32
|
||||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
@@ -280,8 +280,8 @@ define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
|
||||
; XXX: This isn't really testing anything useful now. I think CI
|
||||
; allows unaligned LDS accesses, which would be a problem here.
|
||||
; SI-LABEL: @unaligned_read2_f32
|
||||
; SI-NOT: DS_READ2_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2_b32
|
||||
; SI: s_endpgm
|
||||
define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
|
||||
@@ -296,8 +296,8 @@ define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %
|
||||
}
|
||||
|
||||
; SI-LABEL: @misaligned_2_simple_read2_f32
|
||||
; SI-NOT: DS_READ2_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2_b32
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
|
||||
@@ -312,11 +312,11 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f64
|
||||
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
|
||||
; SI: DS_READ2_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
|
||||
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
|
||||
; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
|
||||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f64(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
@@ -331,8 +331,8 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f64_max_offset
|
||||
; SI: DS_READ2_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
@@ -347,10 +347,10 @@ define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2_f64_too_far
|
||||
; SI-NOT DS_READ2_B64
|
||||
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
|
||||
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT ds_read2_b64
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
@@ -366,9 +366,9 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
|
||||
|
||||
; Alignment only 4
|
||||
; SI-LABEL: @misaligned_read2_f64
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
|
||||
@@ -385,8 +385,8 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
|
||||
@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @load_constant_adjacent_offsets
|
||||
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
|
||||
define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
|
||||
%val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
|
||||
%val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
|
||||
@@ -396,8 +396,8 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
|
||||
}
|
||||
|
||||
; SI-LABEL: @load_constant_disjoint_offsets
|
||||
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
|
||||
define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
|
||||
%val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
|
||||
%val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
|
||||
@@ -409,9 +409,9 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
|
||||
@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @load_misaligned64_constant_offsets
|
||||
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
|
||||
define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
|
||||
%val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
|
||||
%val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
|
||||
@@ -423,11 +423,11 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
|
||||
@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @load_misaligned64_constant_large_offsets
|
||||
; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
|
||||
; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000
|
||||
; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
|
||||
; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
|
||||
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
|
||||
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
|
||||
; SI: s_endpgm
|
||||
define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
|
||||
%val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
|
||||
%val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
|
||||
|
@@ -5,11 +5,11 @@
|
||||
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f32_0_1
|
||||
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: S_WAITCNT lgkmcnt(0)
|
||||
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
@@ -24,11 +24,11 @@ define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f32_1_2
|
||||
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
|
||||
; SI: S_WAITCNT lgkmcnt(0)
|
||||
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
@@ -44,11 +44,11 @@ define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f32_max_offset
|
||||
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
|
||||
; SI: S_WAITCNT lgkmcnt(0)
|
||||
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
@@ -64,11 +64,11 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f32_over_max_offset
|
||||
; SI-NOT: DS_READ2ST64_B32
|
||||
; SI: DS_READ_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
|
||||
; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
|
||||
; SI: DS_READ_B32 {{v[0-9]+}}, [[BIGADD]]
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2st64_b32
|
||||
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
|
||||
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
|
||||
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
@@ -84,8 +84,8 @@ define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, floa
|
||||
}
|
||||
|
||||
; SI-LABEL: @odd_invalid_read2st64_f32_0
|
||||
; SI-NOT: DS_READ2ST64_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2st64_b32
|
||||
; SI: s_endpgm
|
||||
define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
|
||||
@@ -100,8 +100,8 @@ define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @odd_invalid_read2st64_f32_1
|
||||
; SI-NOT: DS_READ2ST64_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2st64_b32
|
||||
; SI: s_endpgm
|
||||
define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
@@ -117,11 +117,11 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f64_0_1
|
||||
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: S_WAITCNT lgkmcnt(0)
|
||||
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
|
||||
@@ -136,11 +136,11 @@ define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f64_1_2
|
||||
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
|
||||
; SI: S_WAITCNT lgkmcnt(0)
|
||||
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
@@ -158,9 +158,9 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
|
||||
; Alignment only
|
||||
|
||||
; SI-LABEL: @misaligned_read2st64_f64
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
|
||||
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
|
||||
@@ -176,11 +176,11 @@ define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspac
|
||||
|
||||
; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
|
||||
; SI-LABEL: @simple_read2st64_f64_max_offset
|
||||
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
|
||||
; SI: S_WAITCNT lgkmcnt(0)
|
||||
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 256
|
||||
@@ -196,11 +196,11 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f64_over_max_offset
|
||||
; SI-NOT: DS_READ2ST64_B64
|
||||
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
|
||||
; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
|
||||
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2st64_b64
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
|
||||
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
@@ -216,8 +216,8 @@ define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, dou
|
||||
}
|
||||
|
||||
; SI-LABEL: @invalid_read2st64_f64_odd_offset
|
||||
; SI-NOT: DS_READ2ST64_B64
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2st64_b64
|
||||
; SI: s_endpgm
|
||||
define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%add.x.0 = add nsw i32 %x.i, 64
|
||||
@@ -236,9 +236,9 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
|
||||
; stride in elements, not bytes, is a multiple of 64.
|
||||
|
||||
; SI-LABEL: @byte_size_only_divisible_64_read2_f64
|
||||
; SI-NOT: DS_READ2ST_B64
|
||||
; SI: DS_READ2_B64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_read2st_b64
|
||||
; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
|
||||
|
@@ -5,10 +5,10 @@
|
||||
|
||||
|
||||
; SI-LABEL: @simple_write2_one_val_f32
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
|
||||
@@ -22,11 +22,11 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f32
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
|
||||
@@ -42,10 +42,10 @@ define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f32_volatile_0
|
||||
; SI-NOT: DS_WRITE2_B32
|
||||
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_write2_b32
|
||||
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
|
||||
@@ -61,10 +61,10 @@ define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f32_volatile_1
|
||||
; SI-NOT: DS_WRITE2_B32
|
||||
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_write2_b32
|
||||
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
|
||||
@@ -81,11 +81,11 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
|
||||
|
||||
; 2 data subregisters from different super registers.
|
||||
; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
|
||||
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
|
||||
; SI: BUFFER_LOAD_DWORDX2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
|
||||
; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
|
||||
@@ -103,10 +103,10 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_subreg2_f32
|
||||
; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
|
||||
@@ -122,10 +122,10 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_subreg4_f32
|
||||
; SI-DAG: BUFFER_LOAD_DWORDX4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
|
||||
@@ -141,11 +141,11 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_max_offset_f32
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
|
||||
@@ -161,9 +161,9 @@ define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_too_far_f32
|
||||
; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
|
||||
@@ -179,9 +179,9 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f32_x2
|
||||
; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
|
||||
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
|
||||
@@ -209,9 +209,9 @@ define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspac
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
|
||||
; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
|
||||
; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: S_ENDPGM
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
|
||||
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
|
||||
@@ -239,10 +239,10 @@ define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, f
|
||||
}
|
||||
|
||||
; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
|
||||
; SI-NOT: DS_WRITE2_B32
|
||||
; SI: DS_WRITE_B32
|
||||
; SI: DS_WRITE_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_write2_b32
|
||||
; SI: ds_write_b32
|
||||
; SI: ds_write_b32
|
||||
; SI: s_endpgm
|
||||
define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
|
||||
@@ -266,10 +266,10 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_one_val_f64
|
||||
; SI: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]],
|
||||
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
|
||||
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
|
||||
@@ -283,11 +283,11 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
|
||||
}
|
||||
|
||||
; SI-LABEL: @misaligned_simple_write2_one_val_f64
|
||||
; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
|
||||
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
|
||||
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
|
||||
@@ -301,11 +301,11 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f64
|
||||
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
|
||||
; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
|
||||
@@ -323,8 +323,8 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
|
||||
@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @store_constant_adjacent_offsets
|
||||
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
define void @store_constant_adjacent_offsets() {
|
||||
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
|
||||
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
|
||||
@@ -332,9 +332,9 @@ define void @store_constant_adjacent_offsets() {
|
||||
}
|
||||
|
||||
; SI-LABEL: @store_constant_disjoint_offsets
|
||||
; SI-DAG: V_MOV_B32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
|
||||
; SI-DAG: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: DS_WRITE2_B32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
|
||||
; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
|
||||
define void @store_constant_disjoint_offsets() {
|
||||
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
|
||||
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
|
||||
@@ -344,9 +344,9 @@ define void @store_constant_disjoint_offsets() {
|
||||
@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @store_misaligned64_constant_offsets
|
||||
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
|
||||
define void @store_misaligned64_constant_offsets() {
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
|
||||
@@ -356,11 +356,11 @@ define void @store_misaligned64_constant_offsets() {
|
||||
@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
|
||||
|
||||
; SI-LABEL: @store_misaligned64_constant_large_offsets
|
||||
; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
|
||||
; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
|
||||
; SI-DAG: DS_WRITE2_B32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI-DAG: DS_WRITE2_B32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
|
||||
; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
|
||||
; SI: s_endpgm
|
||||
define void @store_misaligned64_constant_large_offsets() {
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
|
||||
|
@@ -5,10 +5,10 @@
|
||||
|
||||
|
||||
; SI-LABEL: @simple_write2st64_one_val_f32_0_1
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
|
||||
@@ -22,11 +22,11 @@ define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float add
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2st64_two_val_f32_2_5
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
|
||||
@@ -43,11 +43,11 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2st64_two_val_max_offset_f32
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
|
||||
; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
|
||||
@@ -63,11 +63,11 @@ define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, fl
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2st64_two_val_max_offset_f64
|
||||
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
|
||||
; SI-DAG: V_ADD_I32_e32 [[VPTR:v[0-9]+]],
|
||||
; SI: DS_WRITE2ST64_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
|
||||
; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]],
|
||||
; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
|
||||
@@ -84,9 +84,9 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
|
||||
}
|
||||
|
||||
; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
|
||||
; SI-NOT: DS_WRITE2ST64_B64
|
||||
; SI: DS_WRITE2_B64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: ds_write2st64_b64
|
||||
; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
|
||||
; SI: s_endpgm
|
||||
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
|
||||
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
|
||||
|
@@ -54,9 +54,9 @@ define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 a
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
|
||||
; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
|
||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
|
||||
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
|
||||
%a = load i8 addrspace(1)* %in, align 8
|
||||
%ext = sext i8 %a to i64
|
||||
@@ -65,9 +65,9 @@ define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
|
||||
; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
|
||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
|
||||
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
|
||||
%a = load i16 addrspace(1)* %in, align 8
|
||||
%ext = sext i16 %a to i64
|
||||
@@ -76,9 +76,9 @@ define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
|
||||
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
|
||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: buffer_load_dword [[LOAD:v[0-9]+]],
|
||||
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%a = load i32 addrspace(1)* %in, align 8
|
||||
%ext = sext i32 %a to i64
|
||||
@@ -87,10 +87,10 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
|
||||
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
|
||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||
; SI-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
|
||||
%a = load i8 addrspace(1)* %in, align 8
|
||||
%ext = zext i8 %a to i64
|
||||
@@ -99,10 +99,10 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
|
||||
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
|
||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||
; SI-DAG: buffer_load_ushort [[LOAD:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
|
||||
%a = load i16 addrspace(1)* %in, align 8
|
||||
%ext = zext i16 %a to i64
|
||||
@@ -111,10 +111,10 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
|
||||
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
|
||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||
; SI-DAG: buffer_load_dword [[LOAD:v[0-9]+]],
|
||||
; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%a = load i32 addrspace(1)* %in, align 8
|
||||
%ext = zext i32 %a to i64
|
||||
|
@@ -1,10 +1,10 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}extract_vector_elt_v2i16:
|
||||
; SI: BUFFER_LOAD_USHORT
|
||||
; SI: BUFFER_LOAD_USHORT
|
||||
; SI: BUFFER_STORE_SHORT
|
||||
; SI: BUFFER_STORE_SHORT
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_store_short
|
||||
; SI: buffer_store_short
|
||||
define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
|
||||
%p0 = extractelement <2 x i16> %foo, i32 0
|
||||
%p1 = extractelement <2 x i16> %foo, i32 1
|
||||
@@ -15,10 +15,10 @@ define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) no
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}extract_vector_elt_v4i16:
|
||||
; SI: BUFFER_LOAD_USHORT
|
||||
; SI: BUFFER_LOAD_USHORT
|
||||
; SI: BUFFER_STORE_SHORT
|
||||
; SI: BUFFER_STORE_SHORT
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_load_ushort
|
||||
; SI: buffer_store_short
|
||||
; SI: buffer_store_short
|
||||
define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
|
||||
%p0 = extractelement <4 x i16> %foo, i32 0
|
||||
%p1 = extractelement <4 x i16> %foo, i32 2
|
||||
|
@@ -8,8 +8,8 @@ declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
|
||||
declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}v_fabs_f64:
|
||||
; SI: V_AND_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_and_b32
|
||||
; SI: s_endpgm
|
||||
define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tidext = sext i32 %tid to i64
|
||||
@@ -21,9 +21,9 @@ define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fabs_f64:
|
||||
; SI: V_AND_B32
|
||||
; SI-NOT: V_AND_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_and_b32
|
||||
; SI-NOT: v_and_b32
|
||||
; SI: s_endpgm
|
||||
define void @fabs_f64(double addrspace(1)* %out, double %in) {
|
||||
%fabs = call double @llvm.fabs.f64(double %in)
|
||||
store double %fabs, double addrspace(1)* %out
|
||||
@@ -31,9 +31,9 @@ define void @fabs_f64(double addrspace(1)* %out, double %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fabs_v2f64:
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; SI: s_endpgm
|
||||
define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
|
||||
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
|
||||
store <2 x double> %fabs, <2 x double> addrspace(1)* %out
|
||||
@@ -41,11 +41,11 @@ define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fabs_v4f64:
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; SI: s_endpgm
|
||||
define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
|
||||
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
|
||||
store <4 x double> %fabs, <4 x double> addrspace(1)* %out
|
||||
@@ -53,10 +53,10 @@ define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}fabs_fold_f64:
|
||||
; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-NOT: AND
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-NOT: and
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: s_endpgm
|
||||
define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
|
||||
%fabs = call double @llvm.fabs.f64(double %in0)
|
||||
%fmul = fmul double %fabs, %in1
|
||||
@@ -65,10 +65,10 @@ define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1)
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}fabs_fn_fold_f64:
|
||||
; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-NOT: AND
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-NOT: and
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: s_endpgm
|
||||
define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
|
||||
%fabs = call double @fabs(double %in0)
|
||||
%fmul = fmul double %fabs, %in1
|
||||
@@ -77,8 +77,8 @@ define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fabs_free_f64:
|
||||
; SI: V_AND_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_and_b32
|
||||
; SI: s_endpgm
|
||||
define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
%bc= bitcast i64 %in to double
|
||||
%fabs = call double @llvm.fabs.f64(double %bc)
|
||||
@@ -87,8 +87,8 @@ define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fabs_fn_free_f64:
|
||||
; SI: V_AND_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_and_b32
|
||||
; SI: s_endpgm
|
||||
define void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
%bc= bitcast i64 %in to double
|
||||
%fabs = call double @fabs(double %bc)
|
||||
|
@@ -10,7 +10,7 @@
|
||||
; R600-NOT: AND
|
||||
; R600: |PV.{{[XYZW]}}|
|
||||
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
|
||||
define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
|
||||
%bc= bitcast i32 %in to float
|
||||
@@ -23,7 +23,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
|
||||
; R600-NOT: AND
|
||||
; R600: |PV.{{[XYZW]}}|
|
||||
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
|
||||
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
|
||||
%bc= bitcast i32 %in to float
|
||||
@@ -35,7 +35,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) {
|
||||
; FUNC-LABEL: {{^}}fabs_f32:
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
define void @fabs_f32(float addrspace(1)* %out, float %in) {
|
||||
%fabs = call float @llvm.fabs.f32(float %in)
|
||||
store float %fabs, float addrspace(1)* %out
|
||||
@@ -46,8 +46,8 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) {
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
|
||||
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
|
||||
@@ -60,10 +60,10 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: V_AND_B32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
|
||||
store <4 x float> %fabs, <4 x float> addrspace(1)* %out
|
||||
@@ -71,9 +71,9 @@ define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}fabs_fn_fold:
|
||||
; SI: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: AND
|
||||
; SI: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: and
|
||||
; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
||||
%fabs = call float @fabs(float %in0)
|
||||
%fmul = fmul float %fabs, %in1
|
||||
@@ -82,9 +82,9 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}fabs_fold:
|
||||
; SI: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: AND
|
||||
; SI: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: and
|
||||
; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
|
||||
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
||||
%fabs = call float @llvm.fabs.f32(float %in0)
|
||||
%fmul = fmul float %fabs, %in1
|
||||
|
@@ -3,7 +3,7 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}fadd_f32:
|
||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
|
||||
; SI: V_ADD_F32
|
||||
; SI: v_add_f32
|
||||
define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
|
||||
%add = fadd float %a, %b
|
||||
store float %add, float addrspace(1)* %out, align 4
|
||||
@@ -13,8 +13,8 @@ define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
|
||||
; FUNC-LABEL: {{^}}fadd_v2f32:
|
||||
; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
|
||||
; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
|
||||
%add = fadd <2 x float> %a, %b
|
||||
store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8
|
||||
@@ -26,10 +26,10 @@ define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
|
||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
|
||||
%a = load <4 x float> addrspace(1)* %in, align 16
|
||||
@@ -48,14 +48,14 @@ define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
|
||||
; R600: ADD
|
||||
; R600: ADD
|
||||
; R600: ADD
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: V_ADD_F32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
; SI: v_add_f32
|
||||
define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) {
|
||||
%add = fadd <8 x float> %a, %b
|
||||
store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK: {{^}}fadd_f64:
|
||||
; CHECK: V_ADD_F64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
|
||||
; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
|
||||
|
||||
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
|
@@ -9,7 +9,7 @@ declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_f32:
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
; EG: CEIL {{\*? *}}[[RESULT]]
|
||||
define void @fceil_f32(float addrspace(1)* %out, float %x) {
|
||||
@@ -19,8 +19,8 @@ define void @fceil_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v2f32:
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: CEIL {{\*? *}}[[RESULT]]
|
||||
; EG: CEIL {{\*? *}}[[RESULT]]
|
||||
@@ -31,9 +31,9 @@ define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v3f32:
|
||||
; FIXME-SI: V_CEIL_F32_e32
|
||||
; FIXME-SI: V_CEIL_F32_e32
|
||||
; FIXME-SI: V_CEIL_F32_e32
|
||||
; FIXME-SI: v_ceil_f32_e32
|
||||
; FIXME-SI: v_ceil_f32_e32
|
||||
; FIXME-SI: v_ceil_f32_e32
|
||||
; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
|
||||
@@ -47,10 +47,10 @@ define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v4f32:
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: CEIL {{\*? *}}[[RESULT]]
|
||||
; EG: CEIL {{\*? *}}[[RESULT]]
|
||||
@@ -63,14 +63,14 @@ define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v8f32:
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
|
||||
@@ -88,22 +88,22 @@ define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v16f32:
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: V_CEIL_F32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; SI: v_ceil_f32_e32
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
|
||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
|
||||
|
@@ -9,25 +9,25 @@ declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
|
||||
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_f64:
|
||||
; CI: V_CEIL_F64_e32
|
||||
; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
|
||||
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
|
||||
; SI: S_LSHR_B64
|
||||
; SI: S_NOT_B64
|
||||
; SI: S_AND_B64
|
||||
; SI-DAG: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
|
||||
; SI-DAG: CMP_LT_I32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CMP_GT_I32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CMP_GT_F64
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CMP_NE_I32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: V_ADD_F64
|
||||
; CI: v_ceil_f64_e32
|
||||
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
|
||||
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
|
||||
; SI: s_lshr_b64
|
||||
; SI: s_not_b64
|
||||
; SI: s_and_b64
|
||||
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
|
||||
; SI-DAG: cmp_lt_i32
|
||||
; SI: cndmask_b32
|
||||
; SI: cndmask_b32
|
||||
; SI: cmp_gt_i32
|
||||
; SI: cndmask_b32
|
||||
; SI: cndmask_b32
|
||||
; SI: cmp_gt_f64
|
||||
; SI: cndmask_b32
|
||||
; SI: cmp_ne_i32
|
||||
; SI: cndmask_b32
|
||||
; SI: cndmask_b32
|
||||
; SI: v_add_f64
|
||||
define void @fceil_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = call double @llvm.ceil.f64(double %x) nounwind readnone
|
||||
store double %y, double addrspace(1)* %out
|
||||
@@ -35,8 +35,8 @@ define void @fceil_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v2f64:
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
%y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
|
||||
store <2 x double> %y, <2 x double> addrspace(1)* %out
|
||||
@@ -44,9 +44,9 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
}
|
||||
|
||||
; FIXME-FUNC-LABEL: {{^}}fceil_v3f64:
|
||||
; FIXME-CI: V_CEIL_F64_e32
|
||||
; FIXME-CI: V_CEIL_F64_e32
|
||||
; FIXME-CI: V_CEIL_F64_e32
|
||||
; FIXME-CI: v_ceil_f64_e32
|
||||
; FIXME-CI: v_ceil_f64_e32
|
||||
; FIXME-CI: v_ceil_f64_e32
|
||||
; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
|
||||
; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
|
||||
; store <3 x double> %y, <3 x double> addrspace(1)* %out
|
||||
@@ -54,10 +54,10 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
; }
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v4f64:
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
|
||||
%y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
|
||||
store <4 x double> %y, <4 x double> addrspace(1)* %out
|
||||
@@ -65,14 +65,14 @@ define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v8f64:
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
|
||||
%y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
|
||||
store <8 x double> %y, <8 x double> addrspace(1)* %out
|
||||
@@ -80,22 +80,22 @@ define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fceil_v16f64:
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: V_CEIL_F64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
; CI: v_ceil_f64_e32
|
||||
define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
|
||||
%y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
|
||||
store <16 x double> %y, <16 x double> addrspace(1)* %out
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK: {{^}}flt_f64:
|
||||
; CHECK: V_CMP_LT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
; CHECK: v_cmp_lt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
|
||||
define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
@@ -14,7 +14,7 @@ define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; CHECK: {{^}}fle_f64:
|
||||
; CHECK: V_CMP_LE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
; CHECK: v_cmp_le_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
|
||||
define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
@@ -27,7 +27,7 @@ define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; CHECK: {{^}}fgt_f64:
|
||||
; CHECK: V_CMP_GT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
; CHECK: v_cmp_gt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
|
||||
define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
@@ -40,7 +40,7 @@ define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; CHECK: {{^}}fge_f64:
|
||||
; CHECK: V_CMP_GE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
; CHECK: v_cmp_ge_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
|
||||
define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
@@ -53,7 +53,7 @@ define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; CHECK: {{^}}fne_f64:
|
||||
; CHECK: V_CMP_NEQ_F64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
|
||||
define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
@@ -66,7 +66,7 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; CHECK: {{^}}feq_f64:
|
||||
; CHECK: V_CMP_EQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
; CHECK: v_cmp_eq_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
|
||||
define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
|
@@ -1,8 +1,8 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK: {{^}}fconst_f64:
|
||||
; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000
|
||||
; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
|
||||
; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000
|
||||
; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
|
||||
|
||||
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%r1 = load double addrspace(1)* %in
|
||||
|
@@ -8,14 +8,14 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read
|
||||
|
||||
; Try to identify arg based on higher address.
|
||||
; FUNC-LABEL: {{^}}test_copysign_f32:
|
||||
; SI: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
|
||||
; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
|
||||
; SI-DAG: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
|
||||
; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
|
||||
; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
|
||||
; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
|
||||
; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
|
||||
; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BFI_INT
|
||||
define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
|
||||
@@ -25,7 +25,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_v2f32:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BFI_INT
|
||||
; EG: BFI_INT
|
||||
@@ -36,7 +36,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_v4f32:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: BFI_INT
|
||||
; EG: BFI_INT
|
||||
|
@@ -5,15 +5,15 @@ declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind r
|
||||
declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_f64:
|
||||
; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
|
||||
; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
|
||||
; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
|
||||
; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
|
||||
; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
|
||||
; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
|
||||
; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
|
||||
; SI: s_endpgm
|
||||
define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
|
||||
%result = call double @llvm.copysign.f64(double %mag, double %sign)
|
||||
store double %result, double addrspace(1)* %out, align 8
|
||||
@@ -21,7 +21,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_v2f64:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
|
||||
%result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
|
||||
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
|
||||
@@ -29,7 +29,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_copysign_v4f64:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
|
||||
%result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
|
||||
store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
|
||||
|
@@ -11,8 +11,8 @@
|
||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
|
||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
|
||||
|
||||
; SI-DAG: V_RCP_F32
|
||||
; SI-DAG: V_MUL_F32
|
||||
; SI-DAG: v_rcp_f32
|
||||
; SI-DAG: v_mul_f32
|
||||
define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
|
||||
entry:
|
||||
%0 = fdiv float %a, %b
|
||||
@@ -28,10 +28,10 @@ entry:
|
||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
|
||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
|
||||
|
||||
; SI-DAG: V_RCP_F32
|
||||
; SI-DAG: V_MUL_F32
|
||||
; SI-DAG: V_RCP_F32
|
||||
; SI-DAG: V_MUL_F32
|
||||
; SI-DAG: v_rcp_f32
|
||||
; SI-DAG: v_mul_f32
|
||||
; SI-DAG: v_rcp_f32
|
||||
; SI-DAG: v_mul_f32
|
||||
define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
|
||||
entry:
|
||||
%0 = fdiv <2 x float> %a, %b
|
||||
@@ -49,14 +49,14 @@ entry:
|
||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
|
||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
|
||||
|
||||
; SI-DAG: V_RCP_F32
|
||||
; SI-DAG: V_MUL_F32
|
||||
; SI-DAG: V_RCP_F32
|
||||
; SI-DAG: V_MUL_F32
|
||||
; SI-DAG: V_RCP_F32
|
||||
; SI-DAG: V_MUL_F32
|
||||
; SI-DAG: V_RCP_F32
|
||||
; SI-DAG: V_MUL_F32
|
||||
; SI-DAG: v_rcp_f32
|
||||
; SI-DAG: v_mul_f32
|
||||
; SI-DAG: v_rcp_f32
|
||||
; SI-DAG: v_mul_f32
|
||||
; SI-DAG: v_rcp_f32
|
||||
; SI-DAG: v_mul_f32
|
||||
; SI-DAG: v_rcp_f32
|
||||
; SI-DAG: v_mul_f32
|
||||
define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
|
||||
%a = load <4 x float> addrspace(1) * %in
|
||||
|
@@ -1,8 +1,8 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK: {{^}}fdiv_f64:
|
||||
; CHECK: V_RCP_F64_e32 {{v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
|
@@ -9,26 +9,26 @@ declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
|
||||
declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}ffloor_f64:
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
|
||||
; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
|
||||
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
|
||||
; SI: S_LSHR_B64
|
||||
; SI: S_NOT_B64
|
||||
; SI: S_AND_B64
|
||||
; SI-DAG: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
|
||||
; SI-DAG: CMP_LT_I32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CMP_GT_I32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CMP_LT_F64
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CMP_NE_I32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: V_ADD_F64
|
||||
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
|
||||
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
|
||||
; SI: s_lshr_b64
|
||||
; SI: s_not_b64
|
||||
; SI: s_and_b64
|
||||
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
|
||||
; SI-DAG: cmp_lt_i32
|
||||
; SI: cndmask_b32
|
||||
; SI: cndmask_b32
|
||||
; SI: cmp_gt_i32
|
||||
; SI: cndmask_b32
|
||||
; SI: cndmask_b32
|
||||
; SI: cmp_lt_f64
|
||||
; SI: cndmask_b32
|
||||
; SI: cmp_ne_i32
|
||||
; SI: cndmask_b32
|
||||
; SI: cndmask_b32
|
||||
; SI: v_add_f64
|
||||
define void @ffloor_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = call double @llvm.floor.f64(double %x) nounwind readnone
|
||||
store double %y, double addrspace(1)* %out
|
||||
@@ -36,8 +36,8 @@ define void @ffloor_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}ffloor_v2f64:
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
%y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
|
||||
store <2 x double> %y, <2 x double> addrspace(1)* %out
|
||||
@@ -45,9 +45,9 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
}
|
||||
|
||||
; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
|
||||
; FIXME-CI: V_FLOOR_F64_e32
|
||||
; FIXME-CI: V_FLOOR_F64_e32
|
||||
; FIXME-CI: V_FLOOR_F64_e32
|
||||
; FIXME-CI: v_floor_f64_e32
|
||||
; FIXME-CI: v_floor_f64_e32
|
||||
; FIXME-CI: v_floor_f64_e32
|
||||
; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
|
||||
; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
|
||||
; store <3 x double> %y, <3 x double> addrspace(1)* %out
|
||||
@@ -55,10 +55,10 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
; }
|
||||
|
||||
; FUNC-LABEL: {{^}}ffloor_v4f64:
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
|
||||
%y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
|
||||
store <4 x double> %y, <4 x double> addrspace(1)* %out
|
||||
@@ -66,14 +66,14 @@ define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}ffloor_v8f64:
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
|
||||
%y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
|
||||
store <8 x double> %y, <8 x double> addrspace(1)* %out
|
||||
@@ -81,22 +81,22 @@ define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}ffloor_v16f64:
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: V_FLOOR_F64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
|
||||
%y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
|
||||
store <16 x double> %y, <16 x double> addrspace(1)* %out
|
||||
|
@@ -6,8 +6,8 @@
|
||||
|
||||
|
||||
; CHECK-LABEL: {{^}}branch_use_flat_i32:
|
||||
; CHECK: FLAT_STORE_DWORD {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
|
||||
; CHECK: S_ENDPGM
|
||||
; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
|
||||
; CHECK: s_endpgm
|
||||
define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
|
||||
entry:
|
||||
%cmp = icmp ne i32 %c, 0
|
||||
@@ -35,10 +35,10 @@ end:
|
||||
; remove generic pointers.
|
||||
|
||||
; CHECK-LABEL: {{^}}store_flat_i32:
|
||||
; CHECK: V_MOV_B32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
|
||||
; CHECK: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
|
||||
; CHECK: V_MOV_B32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
|
||||
; CHECK: FLAT_STORE_DWORD v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
|
||||
; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
|
||||
; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
|
||||
; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||
define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
|
||||
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
|
||||
store i32 %x, i32 addrspace(4)* %fptr, align 4
|
||||
@@ -46,7 +46,7 @@ define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_flat_i64:
|
||||
; CHECK: FLAT_STORE_DWORDX2
|
||||
; CHECK: flat_store_dwordx2
|
||||
define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
|
||||
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
|
||||
store i64 %x, i64 addrspace(4)* %fptr, align 8
|
||||
@@ -54,7 +54,7 @@ define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_flat_v4i32:
|
||||
; CHECK: FLAT_STORE_DWORDX4
|
||||
; CHECK: flat_store_dwordx4
|
||||
define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
|
||||
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
|
||||
store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
|
||||
@@ -62,7 +62,7 @@ define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_flat_trunc_i16:
|
||||
; CHECK: FLAT_STORE_SHORT
|
||||
; CHECK: flat_store_short
|
||||
define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
|
||||
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
|
||||
%y = trunc i32 %x to i16
|
||||
@@ -71,7 +71,7 @@ define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_flat_trunc_i8:
|
||||
; CHECK: FLAT_STORE_BYTE
|
||||
; CHECK: flat_store_byte
|
||||
define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
|
||||
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
|
||||
%y = trunc i32 %x to i8
|
||||
@@ -82,7 +82,7 @@ define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
|
||||
|
||||
|
||||
; CHECK-LABEL @load_flat_i32:
|
||||
; CHECK: FLAT_LOAD_DWORD
|
||||
; CHECK: flat_load_dword
|
||||
define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
|
||||
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
|
||||
%fload = load i32 addrspace(4)* %fptr, align 4
|
||||
@@ -91,7 +91,7 @@ define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noa
|
||||
}
|
||||
|
||||
; CHECK-LABEL @load_flat_i64:
|
||||
; CHECK: FLAT_LOAD_DWORDX2
|
||||
; CHECK: flat_load_dwordx2
|
||||
define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
|
||||
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
|
||||
%fload = load i64 addrspace(4)* %fptr, align 4
|
||||
@@ -100,7 +100,7 @@ define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
|
||||
}
|
||||
|
||||
; CHECK-LABEL @load_flat_v4i32:
|
||||
; CHECK: FLAT_LOAD_DWORDX4
|
||||
; CHECK: flat_load_dwordx4
|
||||
define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
|
||||
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
|
||||
%fload = load <4 x i32> addrspace(4)* %fptr, align 4
|
||||
@@ -109,7 +109,7 @@ define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> add
|
||||
}
|
||||
|
||||
; CHECK-LABEL @sextload_flat_i8:
|
||||
; CHECK: FLAT_LOAD_SBYTE
|
||||
; CHECK: flat_load_sbyte
|
||||
define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
|
||||
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
|
||||
%fload = load i8 addrspace(4)* %fptr, align 4
|
||||
@@ -119,7 +119,7 @@ define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
|
||||
}
|
||||
|
||||
; CHECK-LABEL @zextload_flat_i8:
|
||||
; CHECK: FLAT_LOAD_UBYTE
|
||||
; CHECK: flat_load_ubyte
|
||||
define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
|
||||
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
|
||||
%fload = load i8 addrspace(4)* %fptr, align 4
|
||||
@@ -129,7 +129,7 @@ define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
|
||||
}
|
||||
|
||||
; CHECK-LABEL @sextload_flat_i16:
|
||||
; CHECK: FLAT_LOAD_SSHORT
|
||||
; CHECK: flat_load_sshort
|
||||
define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
|
||||
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
|
||||
%fload = load i16 addrspace(4)* %fptr, align 4
|
||||
@@ -139,7 +139,7 @@ define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
|
||||
}
|
||||
|
||||
; CHECK-LABEL @zextload_flat_i16:
|
||||
; CHECK: FLAT_LOAD_USHORT
|
||||
; CHECK: flat_load_ushort
|
||||
define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
|
||||
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
|
||||
%fload = load i16 addrspace(4)* %fptr, align 4
|
||||
@@ -155,12 +155,12 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
|
||||
|
||||
; Check for prologue initializing special SGPRs pointing to scratch.
|
||||
; CHECK-LABEL: {{^}}store_flat_scratch:
|
||||
; CHECK: S_MOVK_I32 flat_scratch_lo, 0
|
||||
; CHECK-NO-PROMOTE: S_MOVK_I32 flat_scratch_hi, 40
|
||||
; CHECK-PROMOTE: S_MOVK_I32 flat_scratch_hi, 0
|
||||
; CHECK: FLAT_STORE_DWORD
|
||||
; CHECK: S_BARRIER
|
||||
; CHECK: FLAT_LOAD_DWORD
|
||||
; CHECK: s_movk_i32 flat_scratch_lo, 0
|
||||
; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 40
|
||||
; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0
|
||||
; CHECK: flat_store_dword
|
||||
; CHECK: s_barrier
|
||||
; CHECK: flat_load_dword
|
||||
define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
|
||||
%alloca = alloca i32, i32 9, align 4
|
||||
%x = call i32 @llvm.r600.read.tidig.x() #3
|
||||
|
@@ -6,7 +6,7 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) n
|
||||
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_f64:
|
||||
; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2, double addrspace(1)* %in3) {
|
||||
%r0 = load double addrspace(1)* %in1
|
||||
@@ -18,8 +18,8 @@ define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_v2f64:
|
||||
; SI: V_FMA_F64
|
||||
; SI: V_FMA_F64
|
||||
; SI: v_fma_f64
|
||||
; SI: v_fma_f64
|
||||
define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
|
||||
<2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
|
||||
%r0 = load <2 x double> addrspace(1)* %in1
|
||||
@@ -31,10 +31,10 @@ define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_v4f64:
|
||||
; SI: V_FMA_F64
|
||||
; SI: V_FMA_F64
|
||||
; SI: V_FMA_F64
|
||||
; SI: V_FMA_F64
|
||||
; SI: v_fma_f64
|
||||
; SI: v_fma_f64
|
||||
; SI: v_fma_f64
|
||||
; SI: v_fma_f64
|
||||
define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
|
||||
<4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
|
||||
%r0 = load <4 x double> addrspace(1)* %in1
|
||||
|
@@ -6,7 +6,7 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounw
|
||||
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_f32:
|
||||
; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}},
|
||||
; EG: FMA {{\*? *}}[[RES]]
|
||||
@@ -21,8 +21,8 @@ define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_v2f32:
|
||||
; SI: V_FMA_F32
|
||||
; SI: V_FMA_F32
|
||||
; SI: v_fma_f32
|
||||
; SI: v_fma_f32
|
||||
|
||||
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].[[CHLO:[XYZW]]][[CHHI:[XYZW]]], {{T[0-9]\.[XYZW]}},
|
||||
; EG-DAG: FMA {{\*? *}}[[RES]].[[CHLO]]
|
||||
@@ -38,10 +38,10 @@ define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)*
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_v4f32:
|
||||
; SI: V_FMA_F32
|
||||
; SI: V_FMA_F32
|
||||
; SI: V_FMA_F32
|
||||
; SI: V_FMA_F32
|
||||
; SI: v_fma_f32
|
||||
; SI: v_fma_f32
|
||||
; SI: v_fma_f32
|
||||
; SI: v_fma_f32
|
||||
|
||||
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].{{[XYZW][XYZW][XYZW][XYZW]}}, {{T[0-9]\.[XYZW]}},
|
||||
; EG-DAG: FMA {{\*? *}}[[RES]].X
|
||||
|
@@ -7,7 +7,7 @@ declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) #0
|
||||
declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0
|
||||
|
||||
; FUNC-LABEL: @test_fmax_f64
|
||||
; SI: V_MAX_F64
|
||||
; SI: v_max_f64
|
||||
define void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
|
||||
%val = call double @llvm.maxnum.f64(double %a, double %b) #0
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
@@ -15,8 +15,8 @@ define void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_v2f64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
define void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
|
||||
%val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0
|
||||
store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
|
||||
@@ -24,10 +24,10 @@ define void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_v4f64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
define void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
|
||||
%val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0
|
||||
store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
|
||||
@@ -35,14 +35,14 @@ define void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_v8f64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
define void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
|
||||
%val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0
|
||||
store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
|
||||
@@ -50,22 +50,22 @@ define void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_v16f64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: V_MAX_F64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
; SI: v_max_f64
|
||||
define void @test_fmax_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
|
||||
%val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0
|
||||
store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
|
||||
|
@@ -9,7 +9,7 @@ declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0
|
||||
declare double @llvm.maxnum.f64(double, double)
|
||||
|
||||
; FUNC-LABEL: @test_fmax_f32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: v_max_f32_e32
|
||||
define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float %a, float %b) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -17,8 +17,8 @@ define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwin
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_v2f32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
|
||||
%val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
|
||||
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
|
||||
@@ -26,10 +26,10 @@ define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_v4f32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
|
||||
%val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
|
||||
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
|
||||
@@ -37,14 +37,14 @@ define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_v8f32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
|
||||
%val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
|
||||
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
|
||||
@@ -52,22 +52,22 @@ define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmax_v16f32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: V_MAX_F32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
; SI: v_max_f32_e32
|
||||
define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
|
||||
%val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
|
||||
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
|
||||
@@ -75,9 +75,9 @@ define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmax_f32
|
||||
; SI-NOT: V_MAX_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 2.0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_max_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -85,9 +85,9 @@ define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan
|
||||
; SI-NOT: V_MAX_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x7fc00000
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_max_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -95,9 +95,9 @@ define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmax_f32_val_nan
|
||||
; SI-NOT: V_MAX_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_max_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -105,9 +105,9 @@ define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmax_f32_nan_val
|
||||
; SI-NOT: V_MAX_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_max_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -115,9 +115,9 @@ define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0
|
||||
; SI-NOT: V_MAX_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_max_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -125,9 +125,9 @@ define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0
|
||||
; SI-NOT: V_MAX_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_max_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -135,9 +135,9 @@ define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0
|
||||
; SI-NOT: V_MAX_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_max_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -145,9 +145,9 @@ define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0
|
||||
; SI-NOT: V_MAX_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_max_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -155,7 +155,7 @@ define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fmax_var_immediate_f32
|
||||
; SI: V_MAX_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
|
||||
; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
|
||||
define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -163,7 +163,7 @@ define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fmax_immediate_var_f32
|
||||
; SI: V_MAX_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
|
||||
; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
|
||||
define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -171,8 +171,8 @@ define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fmax_var_literal_f32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
|
||||
; SI: V_MAX_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
|
||||
; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
|
||||
define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -180,8 +180,8 @@ define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fmax_literal_var_f32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
|
||||
; SI: V_MAX_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
|
||||
; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
|
||||
define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
%val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
|
@@ -7,7 +7,7 @@ declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0
|
||||
declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
|
||||
|
||||
; FUNC-LABEL: @test_fmin_f64
|
||||
; SI: V_MIN_F64
|
||||
; SI: v_min_f64
|
||||
define void @test_fmin_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
|
||||
%val = call double @llvm.minnum.f64(double %a, double %b) #0
|
||||
store double %val, double addrspace(1)* %out, align 8
|
||||
@@ -15,8 +15,8 @@ define void @test_fmin_f64(double addrspace(1)* %out, double %a, double %b) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_v2f64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
define void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
|
||||
%val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0
|
||||
store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
|
||||
@@ -24,10 +24,10 @@ define void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_v4f64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
define void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
|
||||
%val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0
|
||||
store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
|
||||
@@ -35,14 +35,14 @@ define void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_v8f64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
define void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
|
||||
%val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0
|
||||
store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
|
||||
@@ -50,22 +50,22 @@ define void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_v16f64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: V_MIN_F64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
; SI: v_min_f64
|
||||
define void @test_fmin_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
|
||||
%val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0
|
||||
store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
|
||||
|
@@ -7,7 +7,7 @@ declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #0
|
||||
declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
|
||||
|
||||
; FUNC-LABEL: @test_fmin_f32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: v_min_f32_e32
|
||||
define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float %a, float %b) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -15,8 +15,8 @@ define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwin
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_v2f32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
|
||||
%val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
|
||||
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
|
||||
@@ -24,10 +24,10 @@ define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_v4f32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
|
||||
%val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
|
||||
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
|
||||
@@ -35,14 +35,14 @@ define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_v8f32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
|
||||
%val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
|
||||
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
|
||||
@@ -50,22 +50,22 @@ define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @test_fmin_v16f32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: V_MIN_F32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
; SI: v_min_f32_e32
|
||||
define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
|
||||
%val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
|
||||
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
|
||||
@@ -73,9 +73,9 @@ define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmin_f32
|
||||
; SI-NOT: V_MIN_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_min_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -83,9 +83,9 @@ define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmin_f32_nan_nan
|
||||
; SI-NOT: V_MIN_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x7fc00000
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_min_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -93,9 +93,9 @@ define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmin_f32_val_nan
|
||||
; SI-NOT: V_MIN_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_min_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -103,9 +103,9 @@ define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmin_f32_nan_val
|
||||
; SI-NOT: V_MIN_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_min_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -113,9 +113,9 @@ define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmin_f32_p0_p0
|
||||
; SI-NOT: V_MIN_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_min_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -123,9 +123,9 @@ define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmin_f32_p0_n0
|
||||
; SI-NOT: V_MIN_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_min_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -133,9 +133,9 @@ define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmin_f32_n0_p0
|
||||
; SI-NOT: V_MIN_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_min_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -143,9 +143,9 @@ define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @constant_fold_fmin_f32_n0_n0
|
||||
; SI-NOT: V_MIN_F32_e32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI-NOT: v_min_f32_e32
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -153,7 +153,7 @@ define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fmin_var_immediate_f32
|
||||
; SI: V_MIN_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
|
||||
; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
|
||||
define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float %a, float 2.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -161,7 +161,7 @@ define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fmin_immediate_var_f32
|
||||
; SI: V_MIN_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
|
||||
; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
|
||||
define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float 2.0, float %a) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -169,8 +169,8 @@ define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fmin_var_literal_f32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
|
||||
; SI: V_MIN_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
|
||||
; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
|
||||
define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float %a, float 99.0) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
@@ -178,8 +178,8 @@ define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fmin_literal_var_f32
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
|
||||
; SI: V_MIN_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
|
||||
; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
|
||||
define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
|
||||
%val = call float @llvm.minnum.f32(float 99.0, float %a) #0
|
||||
store float %val, float addrspace(1)* %out, align 4
|
||||
|
@@ -5,7 +5,7 @@
|
||||
; FUNC-LABEL: {{^}}fmul_f32:
|
||||
; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
|
||||
|
||||
; SI: V_MUL_F32
|
||||
; SI: v_mul_f32
|
||||
define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
|
||||
entry:
|
||||
%0 = fmul float %a, %b
|
||||
@@ -21,8 +21,8 @@ declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
|
||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
|
||||
|
||||
; SI: V_MUL_F32
|
||||
; SI: V_MUL_F32
|
||||
; SI: v_mul_f32
|
||||
; SI: v_mul_f32
|
||||
define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
|
||||
entry:
|
||||
%0 = fmul <2 x float> %a, %b
|
||||
@@ -36,10 +36,10 @@ entry:
|
||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
; SI: V_MUL_F32
|
||||
; SI: V_MUL_F32
|
||||
; SI: V_MUL_F32
|
||||
; SI: V_MUL_F32
|
||||
; SI: v_mul_f32
|
||||
; SI: v_mul_f32
|
||||
; SI: v_mul_f32
|
||||
; SI: v_mul_f32
|
||||
define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
|
||||
%a = load <4 x float> addrspace(1) * %in
|
||||
@@ -50,9 +50,9 @@ define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_mul_2_k:
|
||||
; SI: V_MUL_F32
|
||||
; SI-NOT: V_MUL_F32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_mul_f32
|
||||
; SI-NOT: v_mul_f32
|
||||
; SI: s_endpgm
|
||||
define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
|
||||
%y = fmul float %x, 2.0
|
||||
%z = fmul float %y, 3.0
|
||||
@@ -61,10 +61,10 @@ define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_mul_2_k_inv:
|
||||
; SI: V_MUL_F32
|
||||
; SI-NOT: V_MUL_F32
|
||||
; SI-NOT: V_MAD_F32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_mul_f32
|
||||
; SI-NOT: v_mul_f32
|
||||
; SI-NOT: v_mad_f32
|
||||
; SI: s_endpgm
|
||||
define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
|
||||
%y = fmul float %x, 3.0
|
||||
%z = fmul float %y, 2.0
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
|
||||
|
||||
; FUNC-LABEL: {{^}}fmul_f64:
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
%r0 = load double addrspace(1)* %in1
|
||||
@@ -12,8 +12,8 @@ define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fmul_v2f64:
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
|
||||
<2 x double> addrspace(1)* %in2) {
|
||||
%r0 = load <2 x double> addrspace(1)* %in1
|
||||
@@ -24,10 +24,10 @@ define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fmul_v4f64:
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
|
||||
<4 x double> addrspace(1)* %in2) {
|
||||
%r0 = load <4 x double> addrspace(1)* %in1
|
||||
|
@@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_f32:
|
||||
; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; CHECK: v_mad_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2, float addrspace(1)* %in3) {
|
||||
@@ -19,7 +19,7 @@ define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_f64:
|
||||
; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2, double addrspace(1)* %in3) {
|
||||
@@ -32,10 +32,10 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
|
||||
@@ -51,10 +51,10 @@ define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
|
||||
@@ -70,10 +70,10 @@ define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fadd_a_a_b_f32(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
@@ -92,10 +92,10 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out,
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
@@ -114,10 +114,10 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
||||
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
|
||||
@@ -134,10 +134,10 @@ define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1
|
||||
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
|
||||
@@ -156,10 +156,10 @@ define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspa
|
||||
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
||||
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
|
||||
@@ -178,10 +178,10 @@ define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1
|
||||
|
||||
|
||||
; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
|
||||
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
||||
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
|
||||
; CHECK: buffer_store_dword [[RESULT]]
|
||||
define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
|
||||
|
@@ -4,9 +4,9 @@
|
||||
; into 2 modifiers, although theoretically that should work.
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
|
||||
; SI: V_AND_B32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
|
||||
; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
|
||||
define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
|
||||
%fabs = call double @llvm.fabs.f64(double %x)
|
||||
%fsub = fsub double -0.000000e+00, %fabs
|
||||
@@ -26,7 +26,7 @@ define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f64:
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
|
||||
define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
|
||||
%fabs = call double @llvm.fabs.f64(double %x)
|
||||
%fsub = fsub double -0.000000e+00, %fabs
|
||||
@@ -45,8 +45,8 @@ define void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f64:
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
%bc = bitcast i64 %in to double
|
||||
%fabs = call double @fabs(double %bc)
|
||||
@@ -56,12 +56,12 @@ define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_f64:
|
||||
; SI: S_LOAD_DWORDX2
|
||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI-DAG: V_OR_B32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
|
||||
; SI-DAG: V_MOV_B32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
|
||||
; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
|
||||
; SI: s_load_dwordx2
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
|
||||
define void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
|
||||
%fabs = call double @llvm.fabs.f64(double %in)
|
||||
%fsub = fsub double -0.000000e+00, %fabs
|
||||
@@ -70,10 +70,10 @@ define void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_v2f64:
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI-NOT: 0x80000000
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
|
||||
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
|
||||
%fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
|
||||
@@ -82,12 +82,12 @@ define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_v4f64:
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI-NOT: 0x80000000
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
define void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
|
||||
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
|
||||
%fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
|
||||
|
@@ -2,8 +2,8 @@
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
|
||||
; SI-NOT: AND
|
||||
; SI: V_SUB_F32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
|
||||
; SI-NOT: and
|
||||
; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
|
||||
define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
|
||||
%fabs = call float @llvm.fabs.f32(float %x)
|
||||
%fsub = fsub float -0.000000e+00, %fabs
|
||||
@@ -13,9 +13,9 @@ define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
|
||||
; SI-NOT: AND
|
||||
; SI: V_MUL_F32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
|
||||
; SI-NOT: AND
|
||||
; SI-NOT: and
|
||||
; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
|
||||
; SI-NOT: and
|
||||
define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
|
||||
%fabs = call float @llvm.fabs.f32(float %x)
|
||||
%fsub = fsub float -0.000000e+00, %fabs
|
||||
@@ -33,8 +33,8 @@ define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
|
||||
; R600: |PV.{{[XYZW]}}|
|
||||
; R600: -PV
|
||||
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
%bc = bitcast i32 %in to float
|
||||
%fabs = call float @llvm.fabs.f32(float %bc)
|
||||
@@ -48,8 +48,8 @@ define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
; R600: |PV.{{[XYZW]}}|
|
||||
; R600: -PV
|
||||
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
%bc = bitcast i32 %in to float
|
||||
%fabs = call float @fabs(float %bc)
|
||||
@@ -59,8 +59,8 @@ define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_f32:
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
|
||||
define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
|
||||
%fabs = call float @llvm.fabs.f32(float %in)
|
||||
%fsub = fsub float -0.000000e+00, %fabs
|
||||
@@ -69,7 +69,7 @@ define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
||||
define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%val = load float addrspace(1)* %in, align 4
|
||||
%fabs = call float @llvm.fabs.f32(float %val)
|
||||
@@ -85,10 +85,10 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
|
||||
; R600: -PV
|
||||
|
||||
; FIXME: SGPR should be used directly for first src operand.
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI-NOT: 0x80000000
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
|
||||
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
|
||||
@@ -98,12 +98,12 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
|
||||
; FIXME: SGPR should be used directly for first src operand.
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
|
||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
|
||||
; SI-NOT: 0x80000000
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
|
||||
define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
|
||||
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_f64:
|
||||
; SI: V_XOR_B32
|
||||
; SI: v_xor_b32
|
||||
define void @fneg_f64(double addrspace(1)* %out, double %in) {
|
||||
%fneg = fsub double -0.000000e+00, %in
|
||||
store double %fneg, double addrspace(1)* %out
|
||||
@@ -9,8 +9,8 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_v2f64:
|
||||
; SI: V_XOR_B32
|
||||
; SI: V_XOR_B32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
|
||||
%fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
|
||||
store <2 x double> %fneg, <2 x double> addrspace(1)* %out
|
||||
@@ -23,10 +23,10 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double>
|
||||
; R600: -PV
|
||||
; R600: -PV
|
||||
|
||||
; SI: V_XOR_B32
|
||||
; SI: V_XOR_B32
|
||||
; SI: V_XOR_B32
|
||||
; SI: V_XOR_B32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
|
||||
%fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
|
||||
store <4 x double> %fneg, <4 x double> addrspace(1)* %out
|
||||
@@ -39,7 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_free_f64:
|
||||
; FIXME: Unnecessary copy to VGPRs
|
||||
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
|
||||
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
|
||||
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
%bc = bitcast i64 %in to double
|
||||
%fsub = fsub double 0.0, %bc
|
||||
@@ -48,9 +48,9 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}fneg_fold_f64:
|
||||
; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-NOT: XOR
|
||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
|
||||
; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-NOT: xor
|
||||
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
|
||||
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
|
||||
%fsub = fsub double -0.0, %in
|
||||
%fmul = fmul double %fsub, %in
|
||||
|
@@ -4,7 +4,7 @@
|
||||
; FUNC-LABEL: {{^}}fneg_f32:
|
||||
; R600: -PV
|
||||
|
||||
; SI: V_XOR_B32
|
||||
; SI: v_xor_b32
|
||||
define void @fneg_f32(float addrspace(1)* %out, float %in) {
|
||||
%fneg = fsub float -0.000000e+00, %in
|
||||
store float %fneg, float addrspace(1)* %out
|
||||
@@ -15,8 +15,8 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) {
|
||||
; R600: -PV
|
||||
; R600: -PV
|
||||
|
||||
; SI: V_XOR_B32
|
||||
; SI: V_XOR_B32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
|
||||
%fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
|
||||
store <2 x float> %fneg, <2 x float> addrspace(1)* %out
|
||||
@@ -29,10 +29,10 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i
|
||||
; R600: -PV
|
||||
; R600: -PV
|
||||
|
||||
; SI: V_XOR_B32
|
||||
; SI: V_XOR_B32
|
||||
; SI: V_XOR_B32
|
||||
; SI: V_XOR_B32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
; SI: v_xor_b32
|
||||
define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
|
||||
%fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
|
||||
store <4 x float> %fneg, <4 x float> addrspace(1)* %out
|
||||
@@ -47,8 +47,8 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i
|
||||
; R600-NOT: XOR
|
||||
; R600: -KC0[2].Z
|
||||
|
||||
; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
|
||||
; SI: V_SUB_F32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
|
||||
; XXX: We could use v_add_f32_e64 with the negate bit here instead.
|
||||
; SI: v_sub_f32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
|
||||
define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
%bc = bitcast i32 %in to float
|
||||
%fsub = fsub float 0.0, %bc
|
||||
@@ -57,9 +57,9 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fold_f32:
|
||||
; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: XOR
|
||||
; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
|
||||
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; SI-NOT: xor
|
||||
; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
|
||||
define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
|
||||
%fsub = fsub float -0.0, %in
|
||||
%fmul = fmul float %fsub, %in
|
||||
|
@@ -4,9 +4,9 @@ declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
||||
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_convert_fp16_to_fp32:
|
||||
; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
|
||||
; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: buffer_load_ushort [[VAL:v[0-9]+]]
|
||||
; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i16 addrspace(1)* %in, align 2
|
||||
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
|
||||
@@ -16,10 +16,10 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
|
||||
|
||||
|
||||
; SI-LABEL: {{^}}test_convert_fp16_to_fp64:
|
||||
; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
|
||||
; SI: V_CVT_F32_F16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
|
||||
; SI: V_CVT_F64_F32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
|
||||
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
|
||||
; SI: buffer_load_ushort [[VAL:v[0-9]+]]
|
||||
; SI: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
|
||||
; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%val = load i16 addrspace(1)* %in, align 2
|
||||
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
|
||||
|
@@ -3,9 +3,9 @@
|
||||
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_convert_fp32_to_fp16:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
|
||||
; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: BUFFER_STORE_SHORT [[RESULT]]
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_short [[RESULT]]
|
||||
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
|
||||
%val = load float addrspace(1)* %in, align 4
|
||||
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
|
||||
|
@@ -4,7 +4,7 @@
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: @fp_to_sint_f64_i32
|
||||
; SI: V_CVT_I32_F64_e32
|
||||
; SI: v_cvt_i32_f64_e32
|
||||
define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
|
||||
%result = fptosi double %in to i32
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
@@ -12,8 +12,8 @@ define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fp_to_sint_v2f64_v2i32
|
||||
; SI: V_CVT_I32_F64_e32
|
||||
; SI: V_CVT_I32_F64_e32
|
||||
; SI: v_cvt_i32_f64_e32
|
||||
; SI: v_cvt_i32_f64_e32
|
||||
define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
|
||||
%result = fptosi <2 x double> %in to <2 x i32>
|
||||
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
|
||||
@@ -21,10 +21,10 @@ define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fp_to_sint_v4f64_v4i32
|
||||
; SI: V_CVT_I32_F64_e32
|
||||
; SI: V_CVT_I32_F64_e32
|
||||
; SI: V_CVT_I32_F64_e32
|
||||
; SI: V_CVT_I32_F64_e32
|
||||
; SI: v_cvt_i32_f64_e32
|
||||
; SI: v_cvt_i32_f64_e32
|
||||
; SI: v_cvt_i32_f64_e32
|
||||
; SI: v_cvt_i32_f64_e32
|
||||
define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
|
||||
%result = fptosi <4 x double> %in to <4 x i32>
|
||||
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
|
||||
@@ -32,20 +32,20 @@ define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fp_to_sint_i64_f64
|
||||
; CI-DAG: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; CI-DAG: V_TRUNC_F64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
|
||||
; CI-DAG: S_MOV_B32 s[[K0_LO:[0-9]+]], 0{{$}}
|
||||
; CI-DAG: S_MOV_B32 s[[K0_HI:[0-9]+]], 0x3df00000
|
||||
; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
|
||||
; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
|
||||
; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
|
||||
|
||||
; CI-DAG: V_MUL_F64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
|
||||
; CI-DAG: V_FLOOR_F64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
|
||||
; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
|
||||
; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
|
||||
|
||||
; CI-DAG: S_MOV_B32 s[[K1_HI:[0-9]+]], 0xc1f00000
|
||||
; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
|
||||
|
||||
; CI-DAG: V_FMA_F64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
|
||||
; CI-DAG: V_CVT_U32_F64_e32 v[[LO:[0-9]+]], [[FMA]]
|
||||
; CI-DAG: V_CVT_I32_F64_e32 v[[HI:[0-9]+]], [[FLOOR]]
|
||||
; CI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
|
||||
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
|
||||
; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
|
||||
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
|
@@ -3,8 +3,8 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}fp_to_sint_i32:
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; SI: V_CVT_I32_F32_e32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_cvt_i32_f32_e32
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
|
||||
%conv = fptosi float %in to i32
|
||||
store i32 %conv, i32 addrspace(1)* %out
|
||||
@@ -14,8 +14,8 @@ define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
|
||||
; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; SI: V_CVT_I32_F32_e32
|
||||
; SI: V_CVT_I32_F32_e32
|
||||
; SI: v_cvt_i32_f32_e32
|
||||
; SI: v_cvt_i32_f32_e32
|
||||
define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
|
||||
%result = fptosi <2 x float> %in to <2 x i32>
|
||||
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
|
||||
@@ -27,10 +27,10 @@ define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; SI: V_CVT_I32_F32_e32
|
||||
; SI: V_CVT_I32_F32_e32
|
||||
; SI: V_CVT_I32_F32_e32
|
||||
; SI: V_CVT_I32_F32_e32
|
||||
; SI: v_cvt_i32_f32_e32
|
||||
; SI: v_cvt_i32_f32_e32
|
||||
; SI: v_cvt_i32_f32_e32
|
||||
; SI: v_cvt_i32_f32_e32
|
||||
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%value = load <4 x float> addrspace(1) * %in
|
||||
%result = fptosi <4 x float> %value to <4 x i32>
|
||||
@@ -63,7 +63,7 @@ define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
|
||||
; EG-DAG: CNDE_INT
|
||||
|
||||
; Check that the compiler doesn't crash with a "cannot select" error
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fptosi float %in to i64
|
||||
@@ -115,7 +115,7 @@ entry:
|
||||
; EG-DAG: CNDE_INT
|
||||
; EG-DAG: CNDE_INT
|
||||
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
|
||||
%conv = fptosi <2 x float> %x to <2 x i64>
|
||||
store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
|
||||
@@ -208,7 +208,7 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
|
||||
; EG-DAG: CNDE_INT
|
||||
; EG-DAG: CNDE_INT
|
||||
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
|
||||
%conv = fptosi <4 x float> %x to <4 x i64>
|
||||
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
|
||||
|
@@ -4,7 +4,7 @@
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}fp_to_uint_i32_f64:
|
||||
; SI: V_CVT_U32_F64_e32
|
||||
; SI: v_cvt_u32_f64_e32
|
||||
define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
|
||||
%cast = fptoui double %in to i32
|
||||
store i32 %cast, i32 addrspace(1)* %out, align 4
|
||||
@@ -12,8 +12,8 @@ define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
|
||||
}
|
||||
|
||||
; SI-LABEL: @fp_to_uint_v2i32_v2f64
|
||||
; SI: V_CVT_U32_F64_e32
|
||||
; SI: V_CVT_U32_F64_e32
|
||||
; SI: v_cvt_u32_f64_e32
|
||||
; SI: v_cvt_u32_f64_e32
|
||||
define void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
|
||||
%cast = fptoui <2 x double> %in to <2 x i32>
|
||||
store <2 x i32> %cast, <2 x i32> addrspace(1)* %out, align 8
|
||||
@@ -21,10 +21,10 @@ define void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %
|
||||
}
|
||||
|
||||
; SI-LABEL: @fp_to_uint_v4i32_v4f64
|
||||
; SI: V_CVT_U32_F64_e32
|
||||
; SI: V_CVT_U32_F64_e32
|
||||
; SI: V_CVT_U32_F64_e32
|
||||
; SI: V_CVT_U32_F64_e32
|
||||
; SI: v_cvt_u32_f64_e32
|
||||
; SI: v_cvt_u32_f64_e32
|
||||
; SI: v_cvt_u32_f64_e32
|
||||
; SI: v_cvt_u32_f64_e32
|
||||
define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
|
||||
%cast = fptoui <4 x double> %in to <4 x i32>
|
||||
store <4 x i32> %cast, <4 x i32> addrspace(1)* %out, align 8
|
||||
@@ -32,20 +32,20 @@ define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @fp_to_uint_i64_f64
|
||||
; CI-DAG: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; CI-DAG: V_TRUNC_F64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
|
||||
; CI-DAG: S_MOV_B32 s[[K0_LO:[0-9]+]], 0{{$}}
|
||||
; CI-DAG: S_MOV_B32 s[[K0_HI:[0-9]+]], 0x3df00000
|
||||
; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
|
||||
; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
|
||||
; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
|
||||
; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
|
||||
|
||||
; CI-DAG: V_MUL_F64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
|
||||
; CI-DAG: V_FLOOR_F64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
|
||||
; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
|
||||
; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
|
||||
|
||||
; CI-DAG: S_MOV_B32 s[[K1_HI:[0-9]+]], 0xc1f00000
|
||||
; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
|
||||
|
||||
; CI-DAG: V_FMA_F64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
|
||||
; CI-DAG: V_CVT_U32_F64_e32 v[[LO:[0-9]+]], [[FMA]]
|
||||
; CI-DAG: V_CVT_U32_F64_e32 v[[HI:[0-9]+]], [[FLOOR]]
|
||||
; CI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
|
||||
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
|
||||
; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
|
||||
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep = getelementptr double addrspace(1)* %in, i32 %tid
|
||||
|
@@ -3,8 +3,8 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}fp_to_uint_i32:
|
||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; SI: V_CVT_U32_F32_e32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_cvt_u32_f32_e32
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_uint_i32 (i32 addrspace(1)* %out, float %in) {
|
||||
%conv = fptoui float %in to i32
|
||||
store i32 %conv, i32 addrspace(1)* %out
|
||||
@@ -14,8 +14,8 @@ define void @fp_to_uint_i32 (i32 addrspace(1)* %out, float %in) {
|
||||
; FUNC-LABEL: {{^}}fp_to_uint_v2i32:
|
||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; SI: V_CVT_U32_F32_e32
|
||||
; SI: V_CVT_U32_F32_e32
|
||||
; SI: v_cvt_u32_f32_e32
|
||||
; SI: v_cvt_u32_f32_e32
|
||||
|
||||
define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
|
||||
%result = fptoui <2 x float> %in to <2 x i32>
|
||||
@@ -28,10 +28,10 @@ define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
|
||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; SI: V_CVT_U32_F32_e32
|
||||
; SI: V_CVT_U32_F32_e32
|
||||
; SI: V_CVT_U32_F32_e32
|
||||
; SI: V_CVT_U32_F32_e32
|
||||
; SI: v_cvt_u32_f32_e32
|
||||
; SI: v_cvt_u32_f32_e32
|
||||
; SI: v_cvt_u32_f32_e32
|
||||
; SI: v_cvt_u32_f32_e32
|
||||
|
||||
define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%value = load <4 x float> addrspace(1) * %in
|
||||
@@ -63,7 +63,7 @@ define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
|
||||
; EG-DAG: CNDE_INT
|
||||
; EG-DAG: CNDE_INT
|
||||
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
|
||||
%conv = fptoui float %x to i64
|
||||
store i64 %conv, i64 addrspace(1)* %out
|
||||
@@ -114,7 +114,7 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
|
||||
; EG-DAG: CNDE_INT
|
||||
; EG-DAG: CNDE_INT
|
||||
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
|
||||
%conv = fptoui <2 x float> %x to <2 x i64>
|
||||
store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
|
||||
@@ -207,7 +207,7 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
|
||||
; EG-DAG: CNDE_INT
|
||||
; EG-DAG: CNDE_INT
|
||||
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
|
||||
%conv = fptoui <4 x float> %x to <4 x i64>
|
||||
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
|
||||
|
||||
; CHECK: {{^}}fpext:
|
||||
; CHECK: V_CVT_F64_F32_e32
|
||||
; CHECK: v_cvt_f64_f32_e32
|
||||
define void @fpext(double addrspace(1)* %out, float %in) {
|
||||
%result = fpext float %in to double
|
||||
store double %result, double addrspace(1)* %out
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
|
||||
|
||||
; CHECK: {{^}}fptrunc:
|
||||
; CHECK: V_CVT_F32_F64_e32
|
||||
; CHECK: v_cvt_f32_f64_e32
|
||||
define void @fptrunc(float addrspace(1)* %out, double %in) {
|
||||
%result = fptrunc double %in to float
|
||||
store float %result, float addrspace(1)* %out
|
||||
|
@@ -1,16 +1,16 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}frem_f32:
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*$}}
|
||||
; SI-DAG: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
|
||||
; SI-DAG: V_CMP
|
||||
; SI-DAG: V_MUL_F32
|
||||
; SI: V_RCP_F32_e32
|
||||
; SI: V_MUL_F32_e32
|
||||
; SI: V_MUL_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_MAD_F32
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
|
||||
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:0x10
|
||||
; SI-DAG: v_cmp
|
||||
; SI-DAG: v_mul_f32
|
||||
; SI: v_rcp_f32_e32
|
||||
; SI: v_mul_f32_e32
|
||||
; SI: v_mul_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_mad_f32
|
||||
; SI: s_endpgm
|
||||
define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) #0 {
|
||||
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
|
||||
@@ -22,14 +22,14 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}unsafe_frem_f32:
|
||||
; SI: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
|
||||
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*}}
|
||||
; SI: V_RCP_F32_e32 [[INVY:v[0-9]+]], [[Y]]
|
||||
; SI: V_MUL_F32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
|
||||
; SI: V_TRUNC_F32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
|
||||
; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:0x10
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
|
||||
; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
|
||||
; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
|
||||
; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
|
||||
; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) #1 {
|
||||
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
|
||||
@@ -44,7 +44,7 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
|
||||
; correctly
|
||||
|
||||
; FUNC-LABEL: {{^}}frem_f64:
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_endpgm
|
||||
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) #0 {
|
||||
%r0 = load double addrspace(1)* %in1, align 8
|
||||
@@ -55,11 +55,11 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}unsafe_frem_f64:
|
||||
; SI: V_RCP_F64_e32
|
||||
; SI: V_MUL_F64
|
||||
; SI: V_BFE_U32
|
||||
; SI: V_FMA_F64
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_rcp_f64_e32
|
||||
; SI: v_mul_f64
|
||||
; SI: v_bfe_u32
|
||||
; SI: v_fma_f64
|
||||
; SI: s_endpgm
|
||||
define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) #1 {
|
||||
%r0 = load double addrspace(1)* %in1, align 8
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK: {{^}}fsqrt_f32:
|
||||
; CHECK: V_SQRT_F32_e32 {{v[0-9]+, v[0-9]+}}
|
||||
; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
|
||||
|
||||
define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%r0 = load float addrspace(1)* %in
|
||||
@@ -11,7 +11,7 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
}
|
||||
|
||||
; CHECK: {{^}}fsqrt_f64:
|
||||
; CHECK: V_SQRT_F64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
|
||||
|
||||
define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%r0 = load double addrspace(1)* %in
|
||||
|
@@ -3,7 +3,7 @@
|
||||
|
||||
|
||||
; FUNC-LABEL: {{^}}v_fsub_f32:
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr float addrspace(1)* %in, i32 1
|
||||
%a = load float addrspace(1)* %in, align 4
|
||||
@@ -16,7 +16,7 @@ define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
; FUNC-LABEL: {{^}}s_fsub_f32:
|
||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
|
||||
|
||||
; SI: V_SUB_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
define void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
|
||||
%sub = fsub float %a, %b
|
||||
store float %sub, float addrspace(1)* %out, align 4
|
||||
@@ -32,8 +32,8 @@ declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
|
||||
|
||||
; FIXME: Should be using SGPR directly for first operand
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
|
||||
%sub = fsub <2 x float> %a, %b
|
||||
store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
|
||||
@@ -46,10 +46,10 @@ define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
|
||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
|
||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
|
||||
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
|
||||
%a = load <4 x float> addrspace(1)* %in, align 16
|
||||
@@ -62,11 +62,11 @@ define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(
|
||||
; FIXME: Should be using SGPR directly for first operand
|
||||
|
||||
; FUNC-LABEL: {{^}}s_fsub_v4f32:
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: s_endpgm
|
||||
define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
|
||||
%result = fsub <4 x float> %a, %b
|
||||
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}fsub_f64:
|
||||
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
|
||||
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
|
||||
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2) {
|
||||
%r0 = load double addrspace(1)* %in1
|
||||
|
@@ -9,9 +9,9 @@ declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
|
||||
declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ftrunc_f64:
|
||||
; CI: V_TRUNC_F64
|
||||
; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
|
||||
; SI: S_ENDPGM
|
||||
; CI: v_trunc_f64
|
||||
; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
|
||||
; SI: s_endpgm
|
||||
define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
%x = load double addrspace(1)* %in, align 8
|
||||
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
|
||||
@@ -20,21 +20,21 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}ftrunc_f64:
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
|
||||
; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
|
||||
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
|
||||
; SI: S_LSHR_B64
|
||||
; SI: S_NOT_B64
|
||||
; SI: S_AND_B64
|
||||
; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
|
||||
; SI: CMP_LT_I32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CMP_GT_I32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: CNDMASK_B32
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
|
||||
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
|
||||
; SI: s_lshr_b64
|
||||
; SI: s_not_b64
|
||||
; SI: s_and_b64
|
||||
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
|
||||
; SI: cmp_lt_i32
|
||||
; SI: cndmask_b32
|
||||
; SI: cndmask_b32
|
||||
; SI: cmp_gt_i32
|
||||
; SI: cndmask_b32
|
||||
; SI: cndmask_b32
|
||||
; SI: s_endpgm
|
||||
define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
|
||||
store double %y, double addrspace(1)* %out
|
||||
@@ -42,8 +42,8 @@ define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}ftrunc_v2f64:
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
%y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
|
||||
store <2 x double> %y, <2 x double> addrspace(1)* %out
|
||||
@@ -51,9 +51,9 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
}
|
||||
|
||||
; FIXME-FUNC-LABEL: {{^}}ftrunc_v3f64:
|
||||
; FIXME-CI: V_TRUNC_F64_e32
|
||||
; FIXME-CI: V_TRUNC_F64_e32
|
||||
; FIXME-CI: V_TRUNC_F64_e32
|
||||
; FIXME-CI: v_trunc_f64_e32
|
||||
; FIXME-CI: v_trunc_f64_e32
|
||||
; FIXME-CI: v_trunc_f64_e32
|
||||
; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
|
||||
; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
|
||||
; store <3 x double> %y, <3 x double> addrspace(1)* %out
|
||||
@@ -61,10 +61,10 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
; }
|
||||
|
||||
; FUNC-LABEL: {{^}}ftrunc_v4f64:
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
|
||||
%y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
|
||||
store <4 x double> %y, <4 x double> addrspace(1)* %out
|
||||
@@ -72,14 +72,14 @@ define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}ftrunc_v8f64:
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
|
||||
%y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
|
||||
store <8 x double> %y, <8 x double> addrspace(1)* %out
|
||||
@@ -87,22 +87,22 @@ define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}ftrunc_v16f64:
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: V_TRUNC_F64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
; CI: v_trunc_f64_e32
|
||||
define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
|
||||
%y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
|
||||
store <16 x double> %y, <16 x double> addrspace(1)* %out
|
||||
|
@@ -10,7 +10,7 @@ declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}ftrunc_f32:
|
||||
; EG: TRUNC
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = call float @llvm.trunc.f32(float %x) nounwind readnone
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -20,8 +20,8 @@ define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
|
||||
; FUNC-LABEL: {{^}}ftrunc_v2f32:
|
||||
; EG: TRUNC
|
||||
; EG: TRUNC
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
|
||||
%y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone
|
||||
store <2 x float> %y, <2 x float> addrspace(1)* %out
|
||||
@@ -32,9 +32,9 @@ define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
|
||||
; FIXME-EG: TRUNC
|
||||
; FIXME-EG: TRUNC
|
||||
; FIXME-EG: TRUNC
|
||||
; FIXME-SI: V_TRUNC_F32_e32
|
||||
; FIXME-SI: V_TRUNC_F32_e32
|
||||
; FIXME-SI: V_TRUNC_F32_e32
|
||||
; FIXME-SI: v_trunc_f32_e32
|
||||
; FIXME-SI: v_trunc_f32_e32
|
||||
; FIXME-SI: v_trunc_f32_e32
|
||||
; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
|
||||
; %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone
|
||||
; store <3 x float> %y, <3 x float> addrspace(1)* %out
|
||||
@@ -46,10 +46,10 @@ define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
|
||||
; EG: TRUNC
|
||||
; EG: TRUNC
|
||||
; EG: TRUNC
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
|
||||
%y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
|
||||
store <4 x float> %y, <4 x float> addrspace(1)* %out
|
||||
@@ -65,14 +65,14 @@ define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
|
||||
; EG: TRUNC
|
||||
; EG: TRUNC
|
||||
; EG: TRUNC
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
|
||||
%y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone
|
||||
store <8 x float> %y, <8 x float> addrspace(1)* %out
|
||||
@@ -96,22 +96,22 @@ define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
|
||||
; EG: TRUNC
|
||||
; EG: TRUNC
|
||||
; EG: TRUNC
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: V_TRUNC_F32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
; SI: v_trunc_f32_e32
|
||||
define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
|
||||
%y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone
|
||||
store <16 x float> %y, <16 x float> addrspace(1)* %out
|
||||
|
@@ -3,8 +3,8 @@
|
||||
|
||||
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
|
||||
; CHECK-LABEL: {{^}}use_gep_address_space:
|
||||
; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}} offset:64
|
||||
; CHECK: v_mov_b32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
|
||||
; CHECK: ds_write_b32 [[PTR]], v{{[0-9]+}} offset:64
|
||||
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
|
||||
store i32 99, i32 addrspace(3)* %p
|
||||
ret void
|
||||
@@ -14,9 +14,9 @@ define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %arra
|
||||
; CHECK-LABEL: {{^}}use_gep_address_space_large_offset:
|
||||
; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
|
||||
; SI, which is why it is being OR'd with the base pointer.
|
||||
; SI: S_OR_B32
|
||||
; CI: S_ADD_I32
|
||||
; CHECK: DS_WRITE_B32
|
||||
; SI: s_or_b32
|
||||
; CI: s_add_i32
|
||||
; CHECK: ds_write_b32
|
||||
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
|
||||
store i32 99, i32 addrspace(3)* %p
|
||||
ret void
|
||||
@@ -24,10 +24,10 @@ define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %arra
|
||||
|
||||
define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
|
||||
; CHECK-LABEL: {{^}}gep_as_vector_v4:
|
||||
; CHECK: S_ADD_I32
|
||||
; CHECK: S_ADD_I32
|
||||
; CHECK: S_ADD_I32
|
||||
; CHECK: S_ADD_I32
|
||||
; CHECK: s_add_i32
|
||||
; CHECK: s_add_i32
|
||||
; CHECK: s_add_i32
|
||||
; CHECK: s_add_i32
|
||||
%p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
|
||||
%p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
|
||||
%p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
|
||||
@@ -42,8 +42,8 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
|
||||
|
||||
define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
|
||||
; CHECK-LABEL: {{^}}gep_as_vector_v2:
|
||||
; CHECK: S_ADD_I32
|
||||
; CHECK: S_ADD_I32
|
||||
; CHECK: s_add_i32
|
||||
; CHECK: s_add_i32
|
||||
%p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
|
||||
%p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
|
||||
%p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -10,8 +10,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -21,7 +21,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -31,8 +31,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -43,7 +43,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_i32:
|
||||
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -51,8 +51,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -61,7 +61,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -70,8 +70,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -81,7 +81,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_and_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -90,8 +90,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -101,7 +101,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -111,8 +111,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -123,7 +123,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_and_i32:
|
||||
; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -131,8 +131,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_and_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -141,7 +141,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -150,8 +150,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -161,7 +161,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -170,8 +170,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -181,7 +181,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -191,8 +191,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -203,7 +203,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_i32:
|
||||
; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -211,8 +211,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -221,7 +221,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -230,8 +230,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -241,7 +241,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_max_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -250,8 +250,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -261,7 +261,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -271,8 +271,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -283,7 +283,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_max_i32:
|
||||
; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -291,8 +291,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_max_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -301,7 +301,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -310,8 +310,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -321,7 +321,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -330,8 +330,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -341,7 +341,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -351,8 +351,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -363,7 +363,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umax_i32:
|
||||
; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -371,8 +371,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -381,7 +381,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -390,8 +390,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -401,7 +401,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_min_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -410,8 +410,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -421,7 +421,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -431,8 +431,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -443,7 +443,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_min_i32:
|
||||
; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -451,8 +451,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_min_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -461,7 +461,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -470,8 +470,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -481,7 +481,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -490,8 +490,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -501,7 +501,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -511,8 +511,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -523,7 +523,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umin_i32:
|
||||
; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -531,8 +531,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -541,7 +541,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -550,8 +550,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -561,7 +561,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_or_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -570,8 +570,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -581,7 +581,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -591,8 +591,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -603,7 +603,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_or_i32:
|
||||
; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -611,8 +611,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_or_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -621,7 +621,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -630,8 +630,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -641,7 +641,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -650,8 +650,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -661,7 +661,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -671,8 +671,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -683,7 +683,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xchg_i32:
|
||||
; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -691,8 +691,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -701,7 +701,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -710,8 +710,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -721,7 +721,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
|
||||
; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
|
||||
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -730,8 +730,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
|
||||
; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%gep = getelementptr i32 addrspace(1)* %out, i32 4
|
||||
@@ -741,7 +741,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
|
||||
define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -751,8 +751,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
|
||||
; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -763,7 +763,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xor_i32:
|
||||
; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
|
||||
define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -771,8 +771,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
|
||||
; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
|
||||
entry:
|
||||
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
|
||||
@@ -781,7 +781,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
|
||||
; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
|
||||
define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
@@ -790,8 +790,8 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
|
||||
; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RET]]
|
||||
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; SI: buffer_store_dword [[RET]]
|
||||
define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
|
||||
|
@@ -6,8 +6,8 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}test_i8:
|
||||
; EG: CF_END
|
||||
; SI: BUFFER_STORE_BYTE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_store_byte
|
||||
; SI: s_endpgm
|
||||
define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
|
||||
%arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
|
||||
%1 = load i8 addrspace(2)* %arrayidx, align 1
|
||||
@@ -19,8 +19,8 @@ define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
|
||||
|
||||
; FUNC-LABEL: {{^}}test_i16:
|
||||
; EG: CF_END
|
||||
; SI: BUFFER_STORE_SHORT
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_store_short
|
||||
; SI: s_endpgm
|
||||
define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
|
||||
%arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
|
||||
%1 = load i16 addrspace(2)* %arrayidx, align 2
|
||||
|
@@ -7,8 +7,8 @@
|
||||
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
|
||||
|
||||
; FUNC-LABEL: {{^}}float:
|
||||
; FIXME: We should be using S_LOAD_DWORD here.
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; FIXME: We should be using s_load_dword here.
|
||||
; SI: buffer_load_dword
|
||||
|
||||
; EG-DAG: MOV {{\** *}}T2.X
|
||||
; EG-DAG: MOV {{\** *}}T3.X
|
||||
@@ -29,8 +29,8 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}i32:
|
||||
|
||||
; FIXME: We should be using S_LOAD_DWORD here.
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; FIXME: We should be using s_load_dword here.
|
||||
; SI: buffer_load_dword
|
||||
|
||||
; EG-DAG: MOV {{\** *}}T2.X
|
||||
; EG-DAG: MOV {{\** *}}T3.X
|
||||
@@ -53,7 +53,7 @@ entry:
|
||||
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
|
||||
|
||||
; FUNC-LABEL: {{^}}struct_foo_gv_load:
|
||||
; SI: S_LOAD_DWORD
|
||||
; SI: s_load_dword
|
||||
|
||||
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
|
||||
%gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
|
||||
@@ -68,8 +68,8 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
|
||||
<1 x i32> <i32 4> ]
|
||||
|
||||
; FUNC-LABEL: {{^}}array_v1_gv_load:
|
||||
; FIXME: We should be using S_LOAD_DWORD here.
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; FIXME: We should be using s_load_dword here.
|
||||
; SI: buffer_load_dword
|
||||
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
|
||||
%gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
|
||||
%load = load <1 x i32> addrspace(2)* %gep, align 4
|
||||
|
@@ -2,8 +2,8 @@
|
||||
|
||||
define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
|
||||
; CHECK-LABEL: {{^}}test_load_store:
|
||||
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
|
||||
; CHECK: BUFFER_STORE_SHORT [[TMP]]
|
||||
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
|
||||
; CHECK: buffer_store_short [[TMP]]
|
||||
%val = load half addrspace(1)* %in
|
||||
store half %val, half addrspace(1) * %out
|
||||
ret void
|
||||
@@ -11,8 +11,8 @@ define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
|
||||
|
||||
define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
|
||||
; CHECK-LABEL: {{^}}test_bitcast_from_half:
|
||||
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
|
||||
; CHECK: BUFFER_STORE_SHORT [[TMP]]
|
||||
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
|
||||
; CHECK: buffer_store_short [[TMP]]
|
||||
%val = load half addrspace(1) * %in
|
||||
%val_int = bitcast half %val to i16
|
||||
store i16 %val_int, i16 addrspace(1)* %out
|
||||
@@ -21,8 +21,8 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
|
||||
|
||||
define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
|
||||
; CHECK-LABEL: {{^}}test_bitcast_to_half:
|
||||
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
|
||||
; CHECK: BUFFER_STORE_SHORT [[TMP]]
|
||||
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
|
||||
; CHECK: buffer_store_short [[TMP]]
|
||||
%val = load i16 addrspace(1)* %in
|
||||
%val_fp = bitcast i16 %val to half
|
||||
store half %val_fp, half addrspace(1)* %out
|
||||
@@ -31,7 +31,7 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
|
||||
|
||||
define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
|
||||
; CHECK-LABEL: {{^}}test_extend32:
|
||||
; CHECK: V_CVT_F32_F16_e32
|
||||
; CHECK: v_cvt_f32_f16_e32
|
||||
|
||||
%val16 = load half addrspace(1)* %in
|
||||
%val32 = fpext half %val16 to float
|
||||
@@ -41,8 +41,8 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
|
||||
|
||||
define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
|
||||
; CHECK-LABEL: {{^}}test_extend64:
|
||||
; CHECK: V_CVT_F32_F16_e32
|
||||
; CHECK: V_CVT_F64_F32_e32
|
||||
; CHECK: v_cvt_f32_f16_e32
|
||||
; CHECK: v_cvt_f64_f32_e32
|
||||
|
||||
%val16 = load half addrspace(1)* %in
|
||||
%val64 = fpext half %val16 to double
|
||||
@@ -52,7 +52,7 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
|
||||
|
||||
define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
|
||||
; CHECK-LABEL: {{^}}test_trunc32:
|
||||
; CHECK: V_CVT_F16_F32_e32
|
||||
; CHECK: v_cvt_f16_f32_e32
|
||||
|
||||
%val32 = load float addrspace(1)* %in
|
||||
%val16 = fptrunc float %val32 to half
|
||||
|
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_eq:
|
||||
; SI: V_CMP_EQ_I64
|
||||
; SI: v_cmp_eq_i64
|
||||
define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp eq i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -10,7 +10,7 @@ define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_ne:
|
||||
; SI: V_CMP_NE_I64
|
||||
; SI: v_cmp_ne_i64
|
||||
define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp ne i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -19,7 +19,7 @@ define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_slt:
|
||||
; SI: V_CMP_LT_I64
|
||||
; SI: v_cmp_lt_i64
|
||||
define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp slt i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -28,7 +28,7 @@ define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_ult:
|
||||
; SI: V_CMP_LT_U64
|
||||
; SI: v_cmp_lt_u64
|
||||
define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp ult i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -37,7 +37,7 @@ define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_sle:
|
||||
; SI: V_CMP_LE_I64
|
||||
; SI: v_cmp_le_i64
|
||||
define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp sle i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -46,7 +46,7 @@ define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_ule:
|
||||
; SI: V_CMP_LE_U64
|
||||
; SI: v_cmp_le_u64
|
||||
define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp ule i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -55,7 +55,7 @@ define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_sgt:
|
||||
; SI: V_CMP_GT_I64
|
||||
; SI: v_cmp_gt_i64
|
||||
define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp sgt i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -64,7 +64,7 @@ define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_ugt:
|
||||
; SI: V_CMP_GT_U64
|
||||
; SI: v_cmp_gt_u64
|
||||
define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -73,7 +73,7 @@ define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_sge:
|
||||
; SI: V_CMP_GE_I64
|
||||
; SI: v_cmp_ge_i64
|
||||
define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp sge i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
@@ -82,7 +82,7 @@ define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_i64_uge:
|
||||
; SI: V_CMP_GE_U64
|
||||
; SI: v_cmp_ge_u64
|
||||
define void @test_i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||
%cmp = icmp uge i64 %a, %b
|
||||
%result = sext i1 %cmp to i32
|
||||
|
@@ -2,9 +2,9 @@
|
||||
|
||||
; Use a 64-bit value with lo bits that can be represented as an inline constant
|
||||
; CHECK-LABEL: {{^}}i64_imm_inline_lo:
|
||||
; CHECK: S_MOV_B32 [[LO:s[0-9]+]], 5
|
||||
; CHECK: V_MOV_B32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
|
||||
; CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_VGPR]]:
|
||||
; CHECK: s_mov_b32 [[LO:s[0-9]+]], 5
|
||||
; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]:
|
||||
define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
|
||||
entry:
|
||||
store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
|
||||
@@ -13,9 +13,9 @@ entry:
|
||||
|
||||
; Use a 64-bit value with hi bits that can be represented as an inline constant
|
||||
; CHECK-LABEL: {{^}}i64_imm_inline_hi:
|
||||
; CHECK: S_MOV_B32 [[HI:s[0-9]+]], 5
|
||||
; CHECK: V_MOV_B32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
|
||||
; CHECK: BUFFER_STORE_DWORDX2 v{{\[[0-9]+:}}[[HI_VGPR]]
|
||||
; CHECK: s_mov_b32 [[HI:s[0-9]+]], 5
|
||||
; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
|
||||
; CHECK: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]]
|
||||
define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
|
||||
entry:
|
||||
store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
|
||||
@@ -23,89 +23,89 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
|
||||
store float 0.0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0.5{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
|
||||
store float 0.5, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -0.5{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
|
||||
store float -0.5, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
|
||||
store float 1.0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -1.0{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
|
||||
store float -1.0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 2.0{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
|
||||
store float 2.0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -2.0{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
|
||||
store float -2.0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 4.0{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
|
||||
store float 4.0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -4.0{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
|
||||
store float -4.0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}store_literal_imm_f32:
|
||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x45800000
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @store_literal_imm_f32(float addrspace(1)* %out) {
|
||||
store float 4096.0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, 0.0
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -113,9 +113,9 @@ define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, 0.5
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -123,9 +123,9 @@ define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, -0.5
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -133,9 +133,9 @@ define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, 1.0
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -143,9 +143,9 @@ define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, -1.0
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -153,9 +153,9 @@ define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, 2.0
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -163,9 +163,9 @@ define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, -2.0
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -173,9 +173,9 @@ define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, 4.0
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -183,9 +183,9 @@ define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32
|
||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: s_load_dword [[VAL:s[0-9]+]]
|
||||
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
|
||||
%y = fadd float %x, -4.0
|
||||
store float %y, float addrspace(1)* %out
|
||||
@@ -193,9 +193,9 @@ define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @commute_add_inline_imm_0.5_f32
|
||||
; CHECK: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%x = load float addrspace(1)* %in
|
||||
%y = fadd float %x, 0.5
|
||||
@@ -204,9 +204,9 @@ define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addr
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @commute_add_literal_f32
|
||||
; CHECK: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
|
||||
; CHECK: V_ADD_F32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
|
||||
; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
|
||||
; CHECK-NEXT: buffer_store_dword [[REG]]
|
||||
define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
|
||||
%x = load float addrspace(1)* %in
|
||||
%y = fadd float %x, 1024.0
|
||||
|
@@ -4,8 +4,8 @@
|
||||
; indexing of vectors.
|
||||
|
||||
; CHECK: extract_w_offset
|
||||
; CHECK: S_MOV_B32 m0
|
||||
; CHECK-NEXT: V_MOVRELS_B32_e32
|
||||
; CHECK: s_mov_b32 m0
|
||||
; CHECK-NEXT: v_movrels_b32_e32
|
||||
define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = add i32 %in, 1
|
||||
@@ -15,8 +15,8 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK: extract_wo_offset
|
||||
; CHECK: S_MOV_B32 m0
|
||||
; CHECK-NEXT: V_MOVRELS_B32_e32
|
||||
; CHECK: s_mov_b32 m0
|
||||
; CHECK-NEXT: v_movrels_b32_e32
|
||||
define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
|
||||
@@ -25,8 +25,8 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK: insert_w_offset
|
||||
; CHECK: S_MOV_B32 m0
|
||||
; CHECK-NEXT: V_MOVRELD_B32_e32
|
||||
; CHECK: s_mov_b32 m0
|
||||
; CHECK-NEXT: v_movreld_b32_e32
|
||||
define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = add i32 %in, 1
|
||||
@@ -37,8 +37,8 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK: insert_wo_offset
|
||||
; CHECK: S_MOV_B32 m0
|
||||
; CHECK-NEXT: V_MOVRELD_B32_e32
|
||||
; CHECK: s_mov_b32 m0
|
||||
; CHECK-NEXT: v_movreld_b32_e32
|
||||
define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
|
||||
|
@@ -6,11 +6,11 @@ declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
|
||||
|
||||
; SI-LABEL: {{^}}private_access_f64_alloca:
|
||||
|
||||
; SI-ALLOCA: BUFFER_STORE_DWORDX2
|
||||
; SI-ALLOCA: BUFFER_LOAD_DWORDX2
|
||||
; SI-ALLOCA: buffer_store_dwordx2
|
||||
; SI-ALLOCA: buffer_load_dwordx2
|
||||
|
||||
; SI-PROMOTE: DS_WRITE_B64
|
||||
; SI-PROMOTE: DS_READ_B64
|
||||
; SI-PROMOTE: ds_write_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
|
||||
%val = load double addrspace(1)* %in, align 8
|
||||
%array = alloca double, i32 16, align 8
|
||||
@@ -24,17 +24,17 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
|
||||
|
||||
; SI-LABEL: {{^}}private_access_v2f64_alloca:
|
||||
|
||||
; SI-ALLOCA: BUFFER_STORE_DWORDX4
|
||||
; SI-ALLOCA: BUFFER_LOAD_DWORDX4
|
||||
; SI-ALLOCA: buffer_store_dwordx4
|
||||
; SI-ALLOCA: buffer_load_dwordx4
|
||||
|
||||
; SI-PROMOTE: DS_WRITE_B32
|
||||
; SI-PROMOTE: DS_WRITE_B32
|
||||
; SI-PROMOTE: DS_WRITE_B32
|
||||
; SI-PROMOTE: DS_WRITE_B32
|
||||
; SI-PROMOTE: DS_READ_B32
|
||||
; SI-PROMOTE: DS_READ_B32
|
||||
; SI-PROMOTE: DS_READ_B32
|
||||
; SI-PROMOTE: DS_READ_B32
|
||||
; SI-PROMOTE: ds_write_b32
|
||||
; SI-PROMOTE: ds_write_b32
|
||||
; SI-PROMOTE: ds_write_b32
|
||||
; SI-PROMOTE: ds_write_b32
|
||||
; SI-PROMOTE: ds_read_b32
|
||||
; SI-PROMOTE: ds_read_b32
|
||||
; SI-PROMOTE: ds_read_b32
|
||||
; SI-PROMOTE: ds_read_b32
|
||||
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
|
||||
%val = load <2 x double> addrspace(1)* %in, align 16
|
||||
%array = alloca <2 x double>, i32 16, align 16
|
||||
@@ -48,11 +48,11 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
|
||||
|
||||
; SI-LABEL: {{^}}private_access_i64_alloca:
|
||||
|
||||
; SI-ALLOCA: BUFFER_STORE_DWORDX2
|
||||
; SI-ALLOCA: BUFFER_LOAD_DWORDX2
|
||||
; SI-ALLOCA: buffer_store_dwordx2
|
||||
; SI-ALLOCA: buffer_load_dwordx2
|
||||
|
||||
; SI-PROMOTE: DS_WRITE_B64
|
||||
; SI-PROMOTE: DS_READ_B64
|
||||
; SI-PROMOTE: ds_write_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
|
||||
%val = load i64 addrspace(1)* %in, align 8
|
||||
%array = alloca i64, i32 16, align 8
|
||||
@@ -66,17 +66,17 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
|
||||
|
||||
; SI-LABEL: {{^}}private_access_v2i64_alloca:
|
||||
|
||||
; SI-ALLOCA: BUFFER_STORE_DWORDX4
|
||||
; SI-ALLOCA: BUFFER_LOAD_DWORDX4
|
||||
; SI-ALLOCA: buffer_store_dwordx4
|
||||
; SI-ALLOCA: buffer_load_dwordx4
|
||||
|
||||
; SI-PROMOTE: DS_WRITE_B32
|
||||
; SI-PROMOTE: DS_WRITE_B32
|
||||
; SI-PROMOTE: DS_WRITE_B32
|
||||
; SI-PROMOTE: DS_WRITE_B32
|
||||
; SI-PROMOTE: DS_READ_B32
|
||||
; SI-PROMOTE: DS_READ_B32
|
||||
; SI-PROMOTE: DS_READ_B32
|
||||
; SI-PROMOTE: DS_READ_B32
|
||||
; SI-PROMOTE: ds_write_b32
|
||||
; SI-PROMOTE: ds_write_b32
|
||||
; SI-PROMOTE: ds_write_b32
|
||||
; SI-PROMOTE: ds_write_b32
|
||||
; SI-PROMOTE: ds_read_b32
|
||||
; SI-PROMOTE: ds_read_b32
|
||||
; SI-PROMOTE: ds_read_b32
|
||||
; SI-PROMOTE: ds_read_b32
|
||||
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
|
||||
%val = load <2 x i64> addrspace(1)* %in, align 16
|
||||
%array = alloca <2 x i64>, i32 16, align 16
|
||||
|
@@ -1,11 +1,11 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}infinite_loop:
|
||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7
|
||||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
|
||||
; SI: BB0_1:
|
||||
; SI: BUFFER_STORE_DWORD [[REG]]
|
||||
; SI: S_WAITCNT vmcnt(0) expcnt(0)
|
||||
; SI: S_BRANCH BB0_1
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; SI: s_branch BB0_1
|
||||
define void @infinite_loop(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
@@ -9,11 +9,11 @@
|
||||
; not just directly into the vector component?
|
||||
|
||||
; SI-LABEL: {{^}}insertelement_v4f32_0:
|
||||
; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
|
||||
; V_MOV_B32_e32
|
||||
; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
|
||||
; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
|
||||
; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
|
||||
; s_load_dwordx4 s{{[}}[[LOW_REG:[0-9]+]]:
|
||||
; v_mov_b32_e32
|
||||
; v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
|
||||
; v_mov_b32_e32 v[[LOW_REG]], [[CONSTREG]]
|
||||
; buffer_store_dwordx4 v{{[}}[[LOW_REG]]:
|
||||
define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
|
||||
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
|
||||
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
|
||||
@@ -49,9 +49,9 @@ define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) n
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v2f32:
|
||||
; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 0x40a00000
|
||||
; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
|
||||
; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
|
||||
; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
|
||||
; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
|
||||
; SI: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
|
||||
define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
|
||||
@@ -59,9 +59,9 @@ define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x fl
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v4f32:
|
||||
; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 0x40a00000
|
||||
; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
|
||||
; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
|
||||
; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
|
||||
; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
|
||||
; SI: buffer_store_dwordx4 {{v\[}}[[LOW_RESULT_REG]]:
|
||||
define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
|
||||
@@ -69,8 +69,8 @@ define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x fl
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
|
||||
@@ -78,10 +78,10 @@ define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x fl
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
|
||||
@@ -89,7 +89,7 @@ define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v2i32:
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <2 x i32> %a, i32 5, i32 %b
|
||||
store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
|
||||
@@ -97,7 +97,7 @@ define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v4i32:
|
||||
; SI: BUFFER_STORE_DWORDX4
|
||||
; SI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <4 x i32> %a, i32 5, i32 %b
|
||||
store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
|
||||
@@ -105,8 +105,8 @@ define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v8i32:
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <8 x i32> %a, i32 5, i32 %b
|
||||
store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
|
||||
@@ -114,10 +114,10 @@ define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v16i32:
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <16 x i32> %a, i32 5, i32 %b
|
||||
store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
|
||||
@@ -126,7 +126,7 @@ define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i
|
||||
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v2i16:
|
||||
; FIXMESI: BUFFER_STORE_DWORDX2
|
||||
; FIXMESI: buffer_store_dwordx2
|
||||
define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <2 x i16> %a, i16 5, i32 %b
|
||||
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
|
||||
@@ -134,7 +134,7 @@ define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v4i16:
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <4 x i16> %a, i16 5, i32 %b
|
||||
store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
|
||||
@@ -151,7 +151,7 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v4i8:
|
||||
; FIXMESI: BUFFER_STORE_DWORD
|
||||
; FIXMESI: buffer_store_dword
|
||||
define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <4 x i8> %a, i8 5, i32 %b
|
||||
store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
|
||||
@@ -159,7 +159,7 @@ define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v8i8:
|
||||
; FIXMESI: BUFFER_STORE_DWORDX2
|
||||
; FIXMESI: buffer_store_dwordx2
|
||||
define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <8 x i8> %a, i8 5, i32 %b
|
||||
store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
|
||||
@@ -167,7 +167,7 @@ define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v16i8:
|
||||
; FIXMESI: BUFFER_STORE_DWORDX4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <16 x i8> %a, i8 5, i32 %b
|
||||
store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
|
||||
@@ -201,11 +201,11 @@ endif:
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <2 x double> %a, double 8.0, i32 %b
|
||||
store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
|
||||
@@ -213,9 +213,9 @@ define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x d
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <2 x i64> %a, i64 5, i32 %b
|
||||
store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
|
||||
@@ -223,11 +223,11 @@ define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64>
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <4 x double> %a, double 8.0, i32 %b
|
||||
store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
|
||||
@@ -235,15 +235,15 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_endpgm
|
||||
define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <8 x double> %a, double 8.0, i32 %b
|
||||
store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
|
||||
|
@@ -5,7 +5,7 @@
|
||||
; EG-CHECK-LABEL: {{^}}i8_arg:
|
||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-CHECK-LABEL: {{^}}i8_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
|
||||
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
|
||||
entry:
|
||||
@@ -17,7 +17,7 @@ entry:
|
||||
; EG-CHECK-LABEL: {{^}}i8_zext_arg:
|
||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-CHECK-LABEL: {{^}}i8_zext_arg:
|
||||
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
|
||||
; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
|
||||
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
|
||||
entry:
|
||||
@@ -29,7 +29,7 @@ entry:
|
||||
; EG-CHECK-LABEL: {{^}}i8_sext_arg:
|
||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-CHECK-LABEL: {{^}}i8_sext_arg:
|
||||
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
|
||||
; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
|
||||
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
|
||||
entry:
|
||||
@@ -41,7 +41,7 @@ entry:
|
||||
; EG-CHECK-LABEL: {{^}}i16_arg:
|
||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-CHECK-LABEL: {{^}}i16_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
|
||||
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
|
||||
entry:
|
||||
@@ -53,7 +53,7 @@ entry:
|
||||
; EG-CHECK-LABEL: {{^}}i16_zext_arg:
|
||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-CHECK-LABEL: {{^}}i16_zext_arg:
|
||||
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
|
||||
; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
|
||||
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
|
||||
entry:
|
||||
@@ -65,7 +65,7 @@ entry:
|
||||
; EG-CHECK-LABEL: {{^}}i16_sext_arg:
|
||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
|
||||
; SI-CHECK-LABEL: {{^}}i16_sext_arg:
|
||||
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
|
||||
; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
|
||||
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
|
||||
entry:
|
||||
@@ -77,7 +77,7 @@ entry:
|
||||
; EG-CHECK-LABEL: {{^}}i32_arg:
|
||||
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
|
||||
; SI-CHECK-LABEL: {{^}}i32_arg:
|
||||
; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
|
||||
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
|
||||
entry:
|
||||
store i32 %in, i32 addrspace(1)* %out, align 4
|
||||
@@ -87,7 +87,7 @@ entry:
|
||||
; EG-CHECK-LABEL: {{^}}f32_arg:
|
||||
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
|
||||
; SI-CHECK-LABEL: {{^}}f32_arg:
|
||||
; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
|
||||
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
|
||||
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
|
||||
entry:
|
||||
store float %in, float addrspace(1)* %out, align 4
|
||||
@@ -98,8 +98,8 @@ entry:
|
||||
; EG-CHECK: VTX_READ_8
|
||||
; EG-CHECK: VTX_READ_8
|
||||
; SI-CHECK-LABEL: {{^}}v2i8_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
|
||||
entry:
|
||||
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
|
||||
@@ -110,8 +110,8 @@ entry:
|
||||
; EG-CHECK: VTX_READ_16
|
||||
; EG-CHECK: VTX_READ_16
|
||||
; SI-CHECK-LABEL: {{^}}v2i16_arg:
|
||||
; SI-CHECK-DAG: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK-DAG: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK-DAG: buffer_load_ushort
|
||||
; SI-CHECK-DAG: buffer_load_ushort
|
||||
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
|
||||
entry:
|
||||
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
|
||||
@@ -122,7 +122,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
|
||||
; SI-CHECK-LABEL: {{^}}v2i32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
|
||||
; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
|
||||
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
|
||||
@@ -133,7 +133,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
|
||||
; SI-CHECK-LABEL: {{^}}v2f32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
|
||||
; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
|
||||
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
|
||||
entry:
|
||||
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
|
||||
@@ -166,7 +166,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||
; SI-CHECK-LABEL: {{^}}v3i32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
|
||||
; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
|
||||
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
|
||||
@@ -178,7 +178,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||
; SI-CHECK-LABEL: {{^}}v3f32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
|
||||
; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
|
||||
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
|
||||
entry:
|
||||
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
|
||||
@@ -191,10 +191,10 @@ entry:
|
||||
; EG-CHECK: VTX_READ_8
|
||||
; EG-CHECK: VTX_READ_8
|
||||
; SI-CHECK-LABEL: {{^}}v4i8_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
|
||||
entry:
|
||||
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
|
||||
@@ -207,10 +207,10 @@ entry:
|
||||
; EG-CHECK: VTX_READ_16
|
||||
; EG-CHECK: VTX_READ_16
|
||||
; SI-CHECK-LABEL: {{^}}v4i16_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
|
||||
entry:
|
||||
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
|
||||
@@ -223,7 +223,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
|
||||
; SI-CHECK-LABEL: {{^}}v4i32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
|
||||
; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
|
||||
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
|
||||
@@ -236,7 +236,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
|
||||
; SI-CHECK-LABEL: {{^}}v4f32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
|
||||
; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
|
||||
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
|
||||
entry:
|
||||
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
|
||||
@@ -253,13 +253,13 @@ entry:
|
||||
; EG-CHECK: VTX_READ_8
|
||||
; EG-CHECK: VTX_READ_8
|
||||
; SI-CHECK-LABEL: {{^}}v8i8_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
|
||||
entry:
|
||||
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
|
||||
@@ -276,14 +276,14 @@ entry:
|
||||
; EG-CHECK: VTX_READ_16
|
||||
; EG-CHECK: VTX_READ_16
|
||||
; SI-CHECK-LABEL: {{^}}v8i16_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
|
||||
entry:
|
||||
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
|
||||
@@ -300,7 +300,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
|
||||
; SI-CHECK-LABEL: {{^}}v8i32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
|
||||
; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
|
||||
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
|
||||
@@ -317,7 +317,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
|
||||
; SI-CHECK-LABEL: {{^}}v8f32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
|
||||
; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
|
||||
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
|
||||
entry:
|
||||
store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
|
||||
@@ -342,22 +342,22 @@ entry:
|
||||
; EG-CHECK: VTX_READ_8
|
||||
; EG-CHECK: VTX_READ_8
|
||||
; SI-CHECK-LABEL: {{^}}v16i8_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: BUFFER_LOAD_UBYTE
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
; SI-CHECK: buffer_load_ubyte
|
||||
define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
|
||||
entry:
|
||||
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
|
||||
@@ -382,22 +382,22 @@ entry:
|
||||
; EG-CHECK: VTX_READ_16
|
||||
; EG-CHECK: VTX_READ_16
|
||||
; SI-CHECK-LABEL: {{^}}v16i16_arg:
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: BUFFER_LOAD_USHORT
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
; SI-CHECK: buffer_load_ushort
|
||||
define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
|
||||
entry:
|
||||
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
|
||||
@@ -422,7 +422,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
|
||||
; SI-CHECK-LABEL: {{^}}v16i32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
|
||||
; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
|
||||
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
|
||||
entry:
|
||||
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
|
||||
@@ -447,7 +447,7 @@ entry:
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
|
||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
|
||||
; SI-CHECK-LABEL: {{^}}v16f32_arg:
|
||||
; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
|
||||
; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
|
||||
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
|
||||
entry:
|
||||
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
|
||||
@@ -455,18 +455,18 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}kernel_arg_i64:
|
||||
; SI: S_LOAD_DWORDX2
|
||||
; SI: S_LOAD_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: s_load_dwordx2
|
||||
; SI: s_load_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
|
||||
store i64 %a, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
|
||||
; XSI: S_LOAD_DWORDX2
|
||||
; XSI: S_LOAD_DWORDX2
|
||||
; XSI: BUFFER_STORE_DWORDX2
|
||||
; XSI: s_load_dwordx2
|
||||
; XSI: s_load_dwordx2
|
||||
; XSI: buffer_store_dwordx2
|
||||
; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
|
||||
; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
|
||||
; ret void
|
||||
|
@@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s
|
||||
; CHECK: S_ENDPGM
|
||||
; CHECK: s_endpgm
|
||||
|
||||
@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
|
||||
|
||||
|
@@ -7,9 +7,9 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
|
||||
declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_abs_i32:
|
||||
; SI: S_SUB_I32
|
||||
; SI: S_MAX_I32
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_sub_i32
|
||||
; SI: s_max_i32
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: SUB_INT
|
||||
; EG: MAX_INT
|
||||
@@ -20,9 +20,9 @@ define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_abs_i32:
|
||||
; SI: V_SUB_I32_e32
|
||||
; SI: V_MAX_I32_e32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_sub_i32_e32
|
||||
; SI: v_max_i32_e32
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: SUB_INT
|
||||
; EG: MAX_INT
|
||||
@@ -34,9 +34,9 @@ define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}abs_i32_legacy_amdil:
|
||||
; SI: V_SUB_I32_e32
|
||||
; SI: V_MAX_I32_e32
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_sub_i32_e32
|
||||
; SI: v_max_i32_e32
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: SUB_INT
|
||||
; EG: MAX_INT
|
||||
|
@@ -3,7 +3,7 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}test_barrier_global:
|
||||
; EG: GROUP_BARRIER
|
||||
; SI: S_BARRIER
|
||||
; SI: s_barrier
|
||||
|
||||
define void @test_barrier_global(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
|
@@ -3,7 +3,7 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}test_barrier_local:
|
||||
; EG: GROUP_BARRIER
|
||||
; SI: S_BARRIER
|
||||
; SI: s_barrier
|
||||
|
||||
define void @test_barrier_local(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
|
@@ -4,7 +4,7 @@
|
||||
declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg:
|
||||
; SI: V_BFE_I32
|
||||
; SI: v_bfe_i32
|
||||
; EG: BFE_INT
|
||||
; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
|
||||
define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
|
||||
@@ -14,7 +14,7 @@ define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm:
|
||||
; SI: V_BFE_I32
|
||||
; SI: v_bfe_i32
|
||||
; EG: BFE_INT
|
||||
define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
|
||||
@@ -23,7 +23,7 @@ define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) n
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg:
|
||||
; SI: V_BFE_I32
|
||||
; SI: v_bfe_i32
|
||||
; EG: BFE_INT
|
||||
define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
|
||||
@@ -32,7 +32,7 @@ define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) n
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg:
|
||||
; SI: V_BFE_I32
|
||||
; SI: v_bfe_i32
|
||||
; EG: BFE_INT
|
||||
define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
|
||||
@@ -41,7 +41,7 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_bfe_print_arg:
|
||||
; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
|
||||
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
|
||||
define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
|
||||
%load = load i32 addrspace(1)* %src0, align 4
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
|
||||
@@ -50,8 +50,8 @@ define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) no
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
|
||||
@@ -60,8 +60,8 @@ define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
|
||||
@@ -70,9 +70,9 @@ define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_6:
|
||||
; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = shl i32 %x, 31
|
||||
@@ -82,11 +82,11 @@ define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_7:
|
||||
; SI-NOT: SHL
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: shl
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = shl i32 %x, 31
|
||||
@@ -97,9 +97,9 @@ define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
|
||||
; FIXME: The shifts should be 1 BFE
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_8:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = shl i32 %x, 31
|
||||
@@ -109,10 +109,10 @@ define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_9:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
|
||||
@@ -121,10 +121,10 @@ define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_10:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
|
||||
@@ -133,10 +133,10 @@ define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_11:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
|
||||
@@ -145,10 +145,10 @@ define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_12:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
|
||||
@@ -157,9 +157,9 @@ define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_13:
|
||||
; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = ashr i32 %x, 31
|
||||
@@ -168,9 +168,9 @@ define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_test_14:
|
||||
; SI-NOT: LSHR
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: lshr
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = lshr i32 %x, 31
|
||||
@@ -179,10 +179,10 @@ define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
|
||||
@@ -191,10 +191,10 @@ define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
|
||||
@@ -203,10 +203,10 @@ define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
|
||||
@@ -215,10 +215,10 @@ define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
|
||||
@@ -227,10 +227,10 @@ define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
|
||||
@@ -239,10 +239,10 @@ define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
|
||||
@@ -251,10 +251,10 @@ define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
|
||||
@@ -263,10 +263,10 @@ define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
|
||||
@@ -275,10 +275,10 @@ define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
|
||||
@@ -287,10 +287,10 @@ define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
|
||||
@@ -299,10 +299,10 @@ define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
|
||||
@@ -311,10 +311,10 @@ define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
|
||||
@@ -323,10 +323,10 @@ define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
|
||||
@@ -335,10 +335,10 @@ define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
|
||||
@@ -347,10 +347,10 @@ define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
|
||||
@@ -359,10 +359,10 @@ define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
|
||||
@@ -371,10 +371,10 @@ define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
|
||||
@@ -383,10 +383,10 @@ define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
|
||||
@@ -395,10 +395,10 @@ define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
|
||||
@@ -409,13 +409,13 @@ define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
|
||||
; XXX - This should really be a single BFE, but the sext_inreg of the
|
||||
; extended type i24 is never custom lowered.
|
||||
; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
|
||||
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
|
||||
; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
|
||||
; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
|
||||
; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
|
||||
; SI: buffer_load_dword [[LOAD:v[0-9]+]],
|
||||
; SI: v_lshlrev_b32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
|
||||
; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
|
||||
; XSI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
|
||||
; XSI-NOT: SHL
|
||||
; XSI-NOT: SHR
|
||||
; XSI: BUFFER_STORE_DWORD [[BFE]],
|
||||
; XSI: buffer_store_dword [[BFE]],
|
||||
define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
|
||||
@@ -426,12 +426,12 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @simplify_demanded_bfe_sdiv
|
||||
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]]
|
||||
; SI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
|
||||
; SI: V_LSHRREV_B32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
|
||||
; SI: V_ADD_I32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
|
||||
; SI: V_ASHRREV_I32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
|
||||
; SI: BUFFER_STORE_DWORD [[TMP2]]
|
||||
; SI: buffer_load_dword [[LOAD:v[0-9]+]]
|
||||
; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
|
||||
; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
|
||||
; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
|
||||
; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
|
||||
; SI: buffer_store_dword [[TMP2]]
|
||||
define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%src = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
|
||||
|
@@ -4,7 +4,7 @@
|
||||
declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
|
||||
; SI: V_BFE_U32
|
||||
; SI: v_bfe_u32
|
||||
; EG: BFE_UINT
|
||||
define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
|
||||
@@ -13,7 +13,7 @@ define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
|
||||
; SI: V_BFE_U32
|
||||
; SI: v_bfe_u32
|
||||
; EG: BFE_UINT
|
||||
define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
|
||||
@@ -22,7 +22,7 @@ define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) n
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
|
||||
; SI: V_BFE_U32
|
||||
; SI: v_bfe_u32
|
||||
; EG: BFE_UINT
|
||||
define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
|
||||
@@ -31,7 +31,7 @@ define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) n
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
|
||||
; SI: V_BFE_U32
|
||||
; SI: v_bfe_u32
|
||||
; EG: BFE_UINT
|
||||
define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
|
||||
@@ -40,8 +40,8 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
|
||||
@@ -50,8 +50,8 @@ define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
|
||||
@@ -60,9 +60,9 @@ define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
|
||||
; SI: BUFFER_LOAD_UBYTE
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_ubyte
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
|
||||
%load = load i8 addrspace(1)* %in
|
||||
%ext = zext i8 %load to i32
|
||||
@@ -72,11 +72,11 @@ define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) n
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI: V_ADD_I32
|
||||
; SI-NEXT: V_AND_B32_e32
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_add_i32
|
||||
; SI-NEXT: v_and_b32_e32
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%add = add i32 %load, 1
|
||||
@@ -87,11 +87,11 @@ define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %i
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI: V_ADD_I32
|
||||
; SI-NEXT: V_AND_B32_e32
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_add_i32
|
||||
; SI-NEXT: v_and_b32_e32
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%add = add i32 %load, 1
|
||||
@@ -102,10 +102,10 @@ define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI: V_ADD_I32
|
||||
; SI: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_add_i32
|
||||
; SI: bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%add = add i32 %load, 1
|
||||
@@ -116,11 +116,11 @@ define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspa
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI: V_ADD_I32
|
||||
; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8
|
||||
; SI-NEXT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_add_i32
|
||||
; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
|
||||
; SI-NEXT: bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%add = add i32 %load, 1
|
||||
@@ -131,11 +131,11 @@ define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspa
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI: V_ADD_I32
|
||||
; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80
|
||||
; SI-NEXT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_add_i32
|
||||
; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
|
||||
; SI-NEXT: bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%add = add i32 %load, 1
|
||||
@@ -146,10 +146,10 @@ define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspa
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI: V_ADD_I32
|
||||
; SI-NEXT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_add_i32
|
||||
; SI-NEXT: bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%add = add i32 %load, 1
|
||||
@@ -160,9 +160,9 @@ define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrsp
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_1:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
|
||||
; SI: s_endpgm
|
||||
; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
|
||||
define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
@@ -188,12 +188,12 @@ define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_4:
|
||||
; SI-NOT: LSHL
|
||||
; SI-NOT: SHR
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: lshl
|
||||
; SI-NOT: shr
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = shl i32 %x, 31
|
||||
@@ -204,11 +204,11 @@ define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_5:
|
||||
; SI: BUFFER_LOAD_DWORD
|
||||
; SI-NOT: LSHL
|
||||
; SI-NOT: SHR
|
||||
; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword
|
||||
; SI-NOT: lshl
|
||||
; SI-NOT: shr
|
||||
; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = shl i32 %x, 31
|
||||
@@ -219,9 +219,9 @@ define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_6:
|
||||
; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = shl i32 %x, 31
|
||||
@@ -231,9 +231,9 @@ define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_7:
|
||||
; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = shl i32 %x, 31
|
||||
@@ -243,10 +243,10 @@ define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_8:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = shl i32 %x, 31
|
||||
@@ -256,10 +256,10 @@ define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_9:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
|
||||
@@ -268,10 +268,10 @@ define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_10:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
|
||||
@@ -280,10 +280,10 @@ define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_11:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
|
||||
@@ -292,10 +292,10 @@ define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_12:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
|
||||
@@ -305,8 +305,8 @@ define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_13:
|
||||
; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = ashr i32 %x, 31
|
||||
@@ -315,9 +315,9 @@ define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_test_14:
|
||||
; SI-NOT: LSHR
|
||||
; SI-NOT: BFE
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: lshr
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: s_endpgm
|
||||
define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%x = load i32 addrspace(1)* %in, align 4
|
||||
%shl = lshr i32 %x, 31
|
||||
@@ -326,10 +326,10 @@ define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
|
||||
@@ -338,10 +338,10 @@ define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
|
||||
@@ -350,10 +350,10 @@ define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
|
||||
@@ -362,10 +362,10 @@ define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
|
||||
@@ -374,10 +374,10 @@ define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
|
||||
@@ -386,10 +386,10 @@ define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
|
||||
@@ -398,10 +398,10 @@ define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
|
||||
@@ -410,10 +410,10 @@ define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
|
||||
@@ -422,10 +422,10 @@ define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
|
||||
@@ -434,10 +434,10 @@ define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFEfppppppppppppp
|
||||
define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
|
||||
@@ -446,10 +446,10 @@ define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
|
||||
@@ -458,10 +458,10 @@ define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
|
||||
@@ -470,10 +470,10 @@ define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
|
||||
@@ -482,10 +482,10 @@ define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
|
||||
@@ -494,10 +494,10 @@ define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
|
||||
@@ -506,10 +506,10 @@ define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
|
||||
@@ -518,10 +518,10 @@ define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
|
||||
@@ -530,10 +530,10 @@ define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
|
||||
@@ -542,10 +542,10 @@ define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
|
||||
; SI-NOT: BFE
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: BUFFER_STORE_DWORD [[VREG]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-NOT: {{[^@]}}bfe
|
||||
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
|
||||
; SI: buffer_store_dword [[VREG]],
|
||||
; SI: s_endpgm
|
||||
; EG-NOT: BFE
|
||||
define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
|
||||
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
|
||||
@@ -558,12 +558,12 @@ define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
|
||||
|
||||
; XXX: The operand to v_bfe_u32 could also just directly be the load register.
|
||||
; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
|
||||
; SI: BUFFER_LOAD_DWORD [[ARG:v[0-9]+]]
|
||||
; SI: V_AND_B32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
|
||||
; SI: V_BFE_U32 [[BFE:v[0-9]+]], [[AND]], 2, 2
|
||||
; SI-DAG: BUFFER_STORE_DWORD [[AND]]
|
||||
; SI-DAG: BUFFER_STORE_DWORD [[BFE]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[ARG:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
|
||||
; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
|
||||
; SI-DAG: buffer_store_dword [[AND]]
|
||||
; SI-DAG: buffer_store_dword [[BFE]]
|
||||
; SI: s_endpgm
|
||||
define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
|
||||
i32 addrspace(1)* %out1,
|
||||
i32 addrspace(1)* %in) nounwind {
|
||||
|
@@ -4,7 +4,7 @@
|
||||
declare i32 @llvm.AMDGPU.bfi(i32, i32, i32) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}bfi_arg_arg_arg:
|
||||
; SI: V_BFI_B32
|
||||
; SI: v_bfi_b32
|
||||
; EG: BFI_INT
|
||||
define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
|
||||
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
|
||||
@@ -13,7 +13,7 @@ define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfi_arg_arg_imm:
|
||||
; SI: V_BFI_B32
|
||||
; SI: v_bfi_b32
|
||||
; EG: BFI_INT
|
||||
define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 123) nounwind readnone
|
||||
@@ -22,7 +22,7 @@ define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfi_arg_imm_arg:
|
||||
; SI: V_BFI_B32
|
||||
; SI: v_bfi_b32
|
||||
; EG: BFI_INT
|
||||
define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
|
||||
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 123, i32 %src2) nounwind readnone
|
||||
@@ -31,7 +31,7 @@ define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounw
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfi_imm_arg_arg:
|
||||
; SI: V_BFI_B32
|
||||
; SI: v_bfi_b32
|
||||
; EG: BFI_INT
|
||||
define void @bfi_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
|
||||
%bfi = call i32 @llvm.AMDGPU.bfi(i32 123, i32 %src1, i32 %src2) nounwind readnone
|
||||
|
@@ -4,7 +4,7 @@
|
||||
declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}bfm_arg_arg:
|
||||
; SI: V_BFM
|
||||
; SI: v_bfm
|
||||
; EG: BFM_INT
|
||||
define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
|
||||
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
|
||||
@@ -13,7 +13,7 @@ define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfm_arg_imm:
|
||||
; SI: V_BFM
|
||||
; SI: v_bfm
|
||||
; EG: BFM_INT
|
||||
define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
|
||||
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
|
||||
@@ -22,7 +22,7 @@ define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfm_imm_arg:
|
||||
; SI: V_BFM
|
||||
; SI: v_bfm
|
||||
; EG: BFM_INT
|
||||
define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
|
||||
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
|
||||
@@ -31,7 +31,7 @@ define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}bfm_imm_imm:
|
||||
; SI: V_BFM
|
||||
; SI: v_bfm
|
||||
; EG: BFM_INT
|
||||
define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
|
||||
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone
|
||||
|
@@ -3,11 +3,11 @@
|
||||
declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_brev_i32:
|
||||
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
|
||||
; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: BUFFER_STORE_DWORD [[VRESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]],
|
||||
; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
|
||||
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
|
||||
; SI: buffer_store_dword [[VRESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
|
||||
store i32 %ctlz, i32 addrspace(1)* %out, align 4
|
||||
@@ -15,10 +15,10 @@ define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_brev_i32:
|
||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
|
||||
; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32 addrspace(1)* %valptr, align 4
|
||||
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
|
||||
|
@@ -5,10 +5,10 @@ declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone
|
||||
declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}clamp_0_1_f32:
|
||||
; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
|
||||
; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: S_ENDPGM
|
||||
; SI: s_load_dword [[ARG:s[0-9]+]],
|
||||
; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
|
||||
; EG: MOV_SAT
|
||||
define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
|
||||
@@ -18,9 +18,9 @@ define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}clamp_0_1_amdil_legacy_f32:
|
||||
; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
|
||||
; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||
; SI: s_load_dword [[ARG:s[0-9]+]],
|
||||
; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
|
||||
%clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
|
||||
store float %clamp, float addrspace(1)* %out, align 4
|
||||
|
@@ -6,7 +6,7 @@ declare float @llvm.AMDGPU.cvt.f32.ubyte2(i32) nounwind readnone
|
||||
declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_unpack_byte0_to_float:
|
||||
; SI: V_CVT_F32_UBYTE0
|
||||
; SI: v_cvt_f32_ubyte0
|
||||
define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%val = load i32 addrspace(1)* %in, align 4
|
||||
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
|
||||
@@ -15,7 +15,7 @@ define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_unpack_byte1_to_float:
|
||||
; SI: V_CVT_F32_UBYTE1
|
||||
; SI: v_cvt_f32_ubyte1
|
||||
define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%val = load i32 addrspace(1)* %in, align 4
|
||||
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
|
||||
@@ -24,7 +24,7 @@ define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_unpack_byte2_to_float:
|
||||
; SI: V_CVT_F32_UBYTE2
|
||||
; SI: v_cvt_f32_ubyte2
|
||||
define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%val = load i32 addrspace(1)* %in, align 4
|
||||
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
|
||||
@@ -33,7 +33,7 @@ define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_unpack_byte3_to_float:
|
||||
; SI: V_CVT_F32_UBYTE3
|
||||
; SI: v_cvt_f32_ubyte3
|
||||
define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||
%val = load i32 addrspace(1)* %in, align 4
|
||||
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
|
||||
|
@@ -4,14 +4,14 @@ declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
|
||||
declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
|
||||
|
||||
; SI-LABEL: {{^}}test_div_fixup_f32:
|
||||
; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
|
||||
; SI-DAG: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
|
||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||
; SI: S_ENDPGM
|
||||
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
|
||||
%result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
|
||||
store float %result, float addrspace(1)* %out, align 4
|
||||
@@ -19,7 +19,7 @@ define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, fl
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_div_fixup_f64:
|
||||
; SI: V_DIV_FIXUP_F64
|
||||
; SI: v_div_fixup_f64
|
||||
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
|
||||
%result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
|
||||
store double %result, double addrspace(1)* %out, align 8
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user