R600/SI: Change all instruction assembly names to lowercase.

This matches the format produced by the AMD proprietary driver.

//==================================================================//
// Shell script for converting .ll test cases: (Pass the .ll files
   you want to convert to this script as arguments).
//==================================================================//

; This was necessary on my system so that A-Z in sed would match only
; upper case.  I'm not sure why.
export LC_ALL='C'

TEST_FILES="$*"

MATCHES=`grep -v Patterns SIInstructions.td | grep -o '"[A-Z0-9_]\+["e]' | grep -o '[A-Z0-9_]\+' | sort -r`

for f in $TEST_FILES; do
  # Check that there are SI tests:
  grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f
  if [ $? -eq 0 ]; then
    for match in $MATCHES; do
      sed -i -e "s/\([ :]$match\)/\L\1/" $f
    done

    # Try to get check lines with partial instruction names
    sed -i 's/\(;[ ]*SI[A-Z\\-]*: \)\([A-Z_0-9]\+\)/\1\L\2/' $f
  fi
done

sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll
sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.*32.ll ../../../test/CodeGen/R600/sext-in-reg.ll
sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll
sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll
sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll

//==================================================================//
// Shell script for converting .td files (run this last)
//==================================================================//

export LC_ALL='C'
sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td
sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221350 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard
2014-11-05 14:50:53 +00:00
parent 1f06060994
commit 8eaed0f63d
238 changed files with 5725 additions and 5725 deletions

View File

@@ -298,7 +298,7 @@ class EXPCommon : InstSI<
(outs),
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
"EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
"exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
[] > {
let EXP_CNT = 1;
@@ -308,10 +308,10 @@ class EXPCommon : InstSI<
multiclass EXP_m {
let isPseudo = 1 in {
def "" : EXPCommon, SIMCInstr <"EXP", SISubtarget.NONE> ;
def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
}
def _si : EXPCommon, SIMCInstr <"EXP", SISubtarget.SI>, EXPe;
def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe;
}
//===----------------------------------------------------------------------===//

File diff suppressed because it is too large Load Diff

View File

@@ -7,7 +7,7 @@
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
; SI-CHECK: {{^}}v4i32_kernel_arg:
; SI-CHECK: BUFFER_STORE_DWORDX4
; SI-CHECK: buffer_store_dwordx4
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
@@ -20,7 +20,7 @@ entry:
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
; SI-CHECK: {{^}}v4f32_kernel_arg:
; SI-CHECK: BUFFER_STORE_DWORDX4
; SI-CHECK: buffer_store_dwordx4
define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out

View File

@@ -11,8 +11,8 @@
; instructions with B64, U64, and I64 take 64-bit operands.
; FUNC-LABEL: {{^}}local_address_load:
; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
; CHECK: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
; CHECK: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = load i32 addrspace(3)* %in
@@ -21,9 +21,9 @@ entry:
}
; FUNC-LABEL: {{^}}local_address_gep:
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: DS_READ_B32 [[VPTR]]
; CHECK: s_add_i32 [[SPTR:s[0-9]]]
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: ds_read_b32 [[VPTR]]
define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 %offset
@@ -33,8 +33,8 @@ entry:
}
; FUNC-LABEL: {{^}}local_address_gep_const_offset:
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: ds_read_b32 v{{[0-9]+}}, [[VPTR]], 0x4,
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 1
@@ -45,9 +45,9 @@ entry:
; Offset too large, can't fold into 16-bit immediate offset.
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: DS_READ_B32 [[VPTR]]
; CHECK: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: ds_read_b32 [[VPTR]]
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 16385
@@ -57,8 +57,8 @@ entry:
}
; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
; CHECK: V_CMP_NE_I32
; CHECK-NOT: V_CMP_NE_I32
; CHECK: v_cmp_ne_i32
; CHECK-NOT: v_cmp_ne_i32
; CHECK: V_CNDMASK_B32
define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
%cmp = icmp ne i32 addrspace(3)* %lds, null
@@ -68,9 +68,9 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds)
}
; FUNC-LABEL: {{^}}mul_32bit_ptr:
; CHECK: V_MUL_LO_I32
; CHECK-NEXT: V_ADD_I32_e32
; CHECK-NEXT: DS_READ_B32
; CHECK: v_mul_lo_i32
; CHECK-NEXT: v_add_i32_e32
; CHECK-NEXT: ds_read_b32
define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
%ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
%val = load float addrspace(3)* %ptr
@@ -81,8 +81,8 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %
@g_lds = addrspace(3) global float zeroinitializer, align 4
; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; CHECK: ds_read_b32 v{{[0-9]+}}, [[REG]]
define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
%val = load float addrspace(3)* @g_lds
store float %val, float addrspace(1)* %out
@@ -94,23 +94,23 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti
@dst = addrspace(3) global [16384 x i32] zeroinitializer
; FUNC-LABEL: {{^}}global_ptr:
; CHECK: DS_WRITE_B32
; CHECK: ds_write_b32
define void @global_ptr() nounwind {
store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
ret void
}
; FUNC-LABEL: {{^}}local_address_store:
; CHECK: DS_WRITE_B32
; CHECK: ds_write_b32
define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
store i32 %val, i32 addrspace(3)* %out
ret void
}
; FUNC-LABEL: {{^}}local_address_gep_store:
; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
; CHECK: s_add_i32 [[SADDR:s[0-9]+]],
; CHECK: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
; CHECK: ds_write_b32 [[ADDR]], v{{[0-9]+}},
define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
%gep = getelementptr i32 addrspace(3)* %out, i32 %offset
store i32 %val, i32 addrspace(3)* %gep, align 4
@@ -118,9 +118,9 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32
}
; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
; CHECK: ds_write_b32 [[VPTR]], [[VAL]], 0x4
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 1
store i32 %val, i32 addrspace(3)* %gep, align 4
@@ -129,9 +129,9 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
; Offset too large, can't fold into 16-bit immediate offset.
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
; CHECK: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; CHECK: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: ds_write_b32 [[VPTR]], v{{[0-9]+}}, 0
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 16385
store i32 %val, i32 addrspace(3)* %gep, align 4

View File

@@ -1,9 +1,9 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
; SI-CHECK: {{^}}f64_kernel_arg:
; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
; SI-CHECK: BUFFER_STORE_DWORDX2
; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
; SI-CHECK: buffer_store_dwordx2
define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
entry:
store double %in, double addrspace(1)* %out

View File

@@ -4,9 +4,9 @@
;FUNC-LABEL: {{^}}test1:
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: V_ADD_I32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
;SI-CHECK: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
;SI-CHECK-NOT: [[REG]]
;SI-CHECK: BUFFER_STORE_DWORD [[REG]],
;SI-CHECK: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
%a = load i32 addrspace(1)* %in
@@ -20,8 +20,8 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -38,10 +38,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -61,14 +61,14 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
entry:
%0 = add <8 x i32> %a, %b
@@ -93,22 +93,22 @@ entry:
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: S_ADD_I32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
; SI-CHECK: s_add_i32
define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
entry:
%0 = add <16 x i32> %a, %b
@@ -117,8 +117,8 @@ entry:
}
; FUNC-LABEL: {{^}}add64:
; SI-CHECK: S_ADD_U32
; SI-CHECK: S_ADDC_U32
; SI-CHECK: s_add_u32
; SI-CHECK: s_addc_u32
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = add i64 %a, %b
@@ -126,13 +126,13 @@ entry:
ret void
}
; The V_ADDC_U32 and V_ADD_I32 instruction can't read SGPRs, because they
; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
; use VCC. The test is designed so that %a will be stored in an SGPR and
; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
; to a VGPR before doing the add.
; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
; SI-CHECK-NOT: V_ADDC_U32_e32 s
; SI-CHECK-NOT: v_addc_u32_e32 s
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
entry:
%0 = load i64 addrspace(1)* %in
@@ -143,8 +143,8 @@ entry:
; Test i64 add inside a branch.
; FUNC-LABEL: {{^}}add64_in_branch:
; SI-CHECK: S_ADD_U32
; SI-CHECK: S_ADDC_U32
; SI-CHECK: s_add_u32
; SI-CHECK: s_addc_u32
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0

View File

@@ -4,8 +4,8 @@
declare i32 @llvm.r600.read.tidig.x() readnone
; SI-LABEL: {{^}}test_i64_vreg:
; SI: V_ADD_I32
; SI: V_ADDC_U32
; SI: v_add_i32
; SI: v_addc_u32
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
@@ -19,8 +19,8 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
; Check that the SGPR add operand is correctly moved to a VGPR.
; SI-LABEL: {{^}}sgpr_operand:
; SI: V_ADD_I32
; SI: V_ADDC_U32
; SI: v_add_i32
; SI: v_addc_u32
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
%foo = load i64 addrspace(1)* %in, align 8
%result = add i64 %foo, %a
@@ -32,8 +32,8 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
; SGPR as other operand.
;
; SI-LABEL: {{^}}sgpr_operand_reversed:
; SI: V_ADD_I32
; SI: V_ADDC_U32
; SI: v_add_i32
; SI: v_addc_u32
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
%foo = load i64 addrspace(1)* %in, align 8
%result = add i64 %a, %foo
@@ -43,10 +43,10 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace
; SI-LABEL: {{^}}test_v2i64_sreg:
; SI: S_ADD_U32
; SI: S_ADDC_U32
; SI: S_ADD_U32
; SI: S_ADDC_U32
; SI: s_add_u32
; SI: s_addc_u32
; SI: s_add_u32
; SI: s_addc_u32
define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
%result = add <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
@@ -54,10 +54,10 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,
}
; SI-LABEL: {{^}}test_v2i64_vreg:
; SI: V_ADD_I32
; SI: V_ADDC_U32
; SI: V_ADD_I32
; SI: V_ADDC_U32
; SI: v_add_i32
; SI: v_addc_u32
; SI: v_add_i32
; SI: v_addc_u32
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
@@ -70,12 +70,12 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
}
; SI-LABEL: {{^}}trunc_i64_add_to_i32:
; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
; SI-NOT: ADDC
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
; SI: s_load_dword s[[SREG0:[0-9]+]]
; SI: s_load_dword s[[SREG1:[0-9]+]]
; SI: s_add_i32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
; SI-NOT: addc
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = add i64 %b, %a
%trunc = trunc i64 %add to i32

View File

@@ -8,10 +8,10 @@
; already in a VGPR after the first read.
; CHECK-LABEL: {{^}}do_as_ptr_calcs:
; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, [[VREG1]] offset:12
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
; CHECK: s_load_dword [[SREG1:s[0-9]+]],
; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12
; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0

View File

@@ -5,8 +5,8 @@
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -23,10 +23,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -38,7 +38,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
}
; FUNC-LABEL: {{^}}s_and_i32:
; SI: S_AND_B32
; SI: s_and_b32
define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%and = and i32 %a, %b
store i32 %and, i32 addrspace(1)* %out, align 4
@@ -46,7 +46,7 @@ define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
}
; FUNC-LABEL: {{^}}s_and_constant_i32:
; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
%and = and i32 %a, 1234567
store i32 %and, i32 addrspace(1)* %out, align 4
@@ -54,7 +54,7 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
}
; FUNC-LABEL: {{^}}v_and_i32:
; SI: V_AND_B32
; SI: v_and_b32
define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
@@ -64,7 +64,7 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr
}
; FUNC-LABEL: {{^}}v_and_constant_i32:
; SI: V_AND_B32
; SI: v_and_b32
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, 1234567
@@ -73,7 +73,7 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr)
}
; FUNC-LABEL: {{^}}s_and_i64:
; SI: S_AND_B64
; SI: s_and_b64
define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%and = and i64 %a, %b
store i64 %and, i64 addrspace(1)* %out, align 8
@@ -82,7 +82,7 @@ define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
; FIXME: Should use SGPRs
; FUNC-LABEL: {{^}}s_and_i1:
; SI: V_AND_B32
; SI: v_and_b32
define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
%and = and i1 %a, %b
store i1 %and, i1 addrspace(1)* %out
@@ -90,7 +90,7 @@ define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
}
; FUNC-LABEL: {{^}}s_and_constant_i64:
; SI: S_AND_B64
; SI: s_and_b64
define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
%and = and i64 %a, 281474976710655
store i64 %and, i64 addrspace(1)* %out, align 8
@@ -98,8 +98,8 @@ define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
}
; FUNC-LABEL: {{^}}v_and_i64:
; SI: V_AND_B32
; SI: V_AND_B32
; SI: v_and_b32
; SI: v_and_b32
define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%b = load i64 addrspace(1)* %bptr, align 8
@@ -109,8 +109,8 @@ define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addr
}
; FUNC-LABEL: {{^}}v_and_i64_br:
; SI: V_AND_B32
; SI: V_AND_B32
; SI: v_and_b32
; SI: v_and_b32
define void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0
@@ -129,8 +129,8 @@ endif:
}
; FUNC-LABEL: {{^}}v_and_constant_i64:
; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1234567
@@ -140,8 +140,8 @@ define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr)
; FIXME: Replace and 0 with mov 0
; FUNC-LABEL: {{^}}v_and_inline_imm_i64:
; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 64
@@ -150,7 +150,7 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt
}
; FUNC-LABEL: {{^}}s_and_inline_imm_i64:
; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 64
store i64 %and, i64 addrspace(1)* %out, align 8

View File

@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}anyext_i1_i32:
; CHECK: V_CNDMASK_B32_e64
; CHECK: v_cndmask_b32_e64
define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp eq i32 %cond, 0

View File

@@ -14,16 +14,16 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; FIXME: We end up with zero argument for ADD, because
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
; with the appropriate offset. We should fold this into the store.
; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
; SI-PROMOTE: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 16
; SI-PROMOTE: DS_WRITE_B32 [[PTRREG]]
; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
%tid = call i32 @llvm.SI.tid() readnone

View File

@@ -3,8 +3,8 @@
declare i32 @llvm.SI.tid() readnone
; SI-LABEL: {{^}}test_array_ptr_calc:
; SI: V_MUL_LO_I32
; SI: V_MUL_HI_I32
; SI: v_mul_lo_i32
; SI: v_mul_hi_i32
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.SI.tid() readnone
%a_ptr = getelementptr [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0

View File

@@ -2,13 +2,13 @@
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; SI: S_ENDPGM
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; SI: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
@@ -18,17 +18,17 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI: S_MOV_B64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
; SI: S_ENDPGM
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
@@ -38,9 +38,9 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
; SI: S_ENDPGM
; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
; SI: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
@@ -52,13 +52,13 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; SI: DS_CMPST_B32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; SI: S_ENDPGM
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
; SI: s_endpgm
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
@@ -67,16 +67,16 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: S_MOV_B64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; SI: DS_CMPST_B64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; SI: S_ENDPGM
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
; SI: s_endpgm
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic

View File

@@ -3,7 +3,7 @@
; FUNC-LABEL: {{^}}atomic_add_local:
; R600: LDS_ADD *
; SI: DS_ADD_U32
; SI: ds_add_u32
define void @atomic_add_local(i32 addrspace(3)* %local) {
%unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
ret void
@@ -11,7 +11,7 @@ define void @atomic_add_local(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
; R600: LDS_ADD *
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
@@ -20,7 +20,7 @@ define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_add_ret_local:
; R600: LDS_ADD_RET *
; SI: DS_ADD_RTN_U32
; SI: ds_add_rtn_u32
define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
@@ -29,7 +29,7 @@ define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
; R600: LDS_ADD_RET *
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst

View File

@@ -3,7 +3,7 @@
; FUNC-LABEL: {{^}}atomic_sub_local:
; R600: LDS_SUB *
; SI: DS_SUB_U32
; SI: ds_sub_u32
define void @atomic_sub_local(i32 addrspace(3)* %local) {
%unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
ret void
@@ -11,7 +11,7 @@ define void @atomic_sub_local(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
; R600: LDS_SUB *
; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
@@ -20,7 +20,7 @@ define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
; FUNC-LABEL: {{^}}atomic_sub_ret_local:
; R600: LDS_SUB_RET *
; SI: DS_SUB_RTN_U32
; SI: ds_sub_rtn_u32
define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
@@ -29,7 +29,7 @@ define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
; R600: LDS_SUB_RET *
; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst

View File

@@ -7,7 +7,7 @@
; R600-CHECK: {{^}}bfi_def:
; R600-CHECK: BFI_INT
; SI-CHECK: @bfi_def
; SI-CHECK: V_BFI_B32
; SI-CHECK: v_bfi_b32
define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %x, -1
@@ -23,7 +23,7 @@ entry:
; R600-CHECK: {{^}}bfi_sha256_ch:
; R600-CHECK: BFI_INT
; SI-CHECK: @bfi_sha256_ch
; SI-CHECK: V_BFI_B32
; SI-CHECK: v_bfi_b32
define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %y, %z
@@ -38,8 +38,8 @@ entry:
; R600-CHECK: {{^}}bfi_sha256_ma:
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
; SI-CHECK: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
; SI-CHECK: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:

View File

@@ -5,7 +5,7 @@
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
; SI: S_ENDPGM
; SI: s_endpgm
define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
entry:
%1 = load <32 x i8> addrspace(2)* %0
@@ -18,7 +18,7 @@ entry:
}
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
; SI: S_ENDPGM
; SI: s_endpgm
define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
@@ -56,7 +56,7 @@ define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nou
}
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
; SI: S_ENDPGM
; SI: s_endpgm
define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%val = load <2 x i32> addrspace(1)* %in, align 8
%add = add <2 x i32> %val, <i32 4, i32 9>
@@ -66,7 +66,7 @@ define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace
}
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
; SI: S_ENDPGM
; SI: s_endpgm
define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
%val = load double addrspace(1)* %in, align 8
%add = fadd double %val, 4.0

View File

@@ -9,13 +9,13 @@ declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
; FUNC-LABEL: @test_bswap_i32
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; SI-DAG: V_ALIGNBIT_B32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
; SI-DAG: V_ALIGNBIT_B32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
; SI-DAG: S_MOV_B32 [[K:s[0-9]+]], 0xff00ff
; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
; SI-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff
; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
@@ -24,13 +24,13 @@ define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: @test_bswap_v2i32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI: S_ENDPGM
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
%val = load <2 x i32> addrspace(1)* %in, align 8
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
@@ -39,19 +39,19 @@ define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(
}
; FUNC-LABEL: @test_bswap_v4i32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI: S_ENDPGM
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
%val = load <4 x i32> addrspace(1)* %in, align 16
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
@@ -60,31 +60,31 @@ define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(
}
; FUNC-LABEL: @test_bswap_v8i32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_ALIGNBIT_B32
; SI-DAG: V_BFI_B32
; SI: S_ENDPGM
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_alignbit_b32
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
%val = load <8 x i32> addrspace(1)* %in, align 32
%bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone

View File

@@ -6,9 +6,9 @@
; R600-CHECK: MOV
; R600-CHECK-NOT: MOV
; SI-CHECK: {{^}}build_vector2:
; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
; SI-CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[X]]:[[Y]]{{\]}}
; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
; SI-CHECK: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
entry:
store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
@@ -22,11 +22,11 @@ entry:
; R600-CHECK: MOV
; R600-CHECK-NOT: MOV
; SI-CHECK: {{^}}build_vector4:
; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Z:[0-9]]], 7
; SI-CHECK-DAG: V_MOV_B32_e32 v[[W:[0-9]]], 8
; SI-CHECK: BUFFER_STORE_DWORDX4 v{{\[}}[[X]]:[[W]]{{\]}}
; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
; SI-CHECK-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
; SI-CHECK-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
; SI-CHECK: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
entry:
store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out

View File

@@ -8,8 +8,8 @@ target triple = "r600--"
; OPT: mul nsw i32
; OPT-NEXT: sext
; SI-LLC-LABEL: {{^}}test:
; SI-LLC: S_MUL_I32
; SI-LLC-NOT: MUL
; SI-LLC: s_mul_i32
; SI-LLC-NOT: mul
define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
entry:
%0 = mul nsw i32 %a, 3

View File

@@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
declare float @llvm.fabs.f32(float) #1
; FUNC-LABEL: @commute_add_imm_fabs_f32
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: V_ADD_F32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
@@ -18,9 +18,9 @@ define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(
}
; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: V_MUL_F32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
@@ -33,9 +33,9 @@ define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrs
}
; FUNC-LABEL: @commute_mul_imm_fneg_f32
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: V_MUL_F32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
; SI-NEXT: BUFFER_STORE_DWORD [[REG]]
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid

View File

@@ -5,8 +5,8 @@
; instructions that access scratch memory. Bit 23, which is the add_tid_enable
; bit, is only set for scratch access, so we can check for the absence of this
; value if we want to ensure scratch memory is not being used.
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
%concat = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
store <2 x i32> %concat, <2 x i32> addrspace(1)* %out, align 8
@@ -14,8 +14,8 @@ define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x
}
; FUNC-LABEL: {{^}}test_concat_v2i32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%concat = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i32> %concat, <4 x i32> addrspace(1)* %out, align 16
@@ -23,8 +23,8 @@ define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x
}
; FUNC-LABEL: {{^}}test_concat_v4i32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
%concat = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i32> %concat, <8 x i32> addrspace(1)* %out, align 32
@@ -32,8 +32,8 @@ define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x
}
; FUNC-LABEL: {{^}}test_concat_v8i32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
%concat = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i32> %concat, <16 x i32> addrspace(1)* %out, align 64
@@ -41,8 +41,8 @@ define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x
}
; FUNC-LABEL: {{^}}test_concat_v16i32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind {
%concat = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i32> %concat, <32 x i32> addrspace(1)* %out, align 128
@@ -50,8 +50,8 @@ define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <1
}
; FUNC-LABEL: {{^}}test_concat_v1f32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind {
%concat = shufflevector <1 x float> %a, <1 x float> %b, <2 x i32> <i32 0, i32 1>
store <2 x float> %concat, <2 x float> addrspace(1)* %out, align 8
@@ -59,8 +59,8 @@ define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <
}
; FUNC-LABEL: {{^}}test_concat_v2f32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%concat = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x float> %concat, <4 x float> addrspace(1)* %out, align 16
@@ -68,8 +68,8 @@ define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <
}
; FUNC-LABEL: {{^}}test_concat_v4f32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%concat = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x float> %concat, <8 x float> addrspace(1)* %out, align 32
@@ -77,8 +77,8 @@ define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <
}
; FUNC-LABEL: {{^}}test_concat_v8f32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%concat = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x float> %concat, <16 x float> addrspace(1)* %out, align 64
@@ -86,8 +86,8 @@ define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a,
}
; FUNC-LABEL: {{^}}test_concat_v16f32:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%concat = shufflevector <16 x float> %a, <16 x float> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x float> %concat, <32 x float> addrspace(1)* %out, align 128
@@ -95,8 +95,8 @@ define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a
}
; FUNC-LABEL: {{^}}test_concat_v1i64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
@@ -104,8 +104,8 @@ define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a,
}
; FUNC-LABEL: {{^}}test_concat_v2i64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
@@ -113,8 +113,8 @@ define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a,
}
; FUNC-LABEL: {{^}}test_concat_v4i64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
@@ -122,8 +122,8 @@ define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a,
}
; FUNC-LABEL: {{^}}test_concat_v8i64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
@@ -131,8 +131,8 @@ define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a
}
; FUNC-LABEL: {{^}}test_concat_v16i64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
@@ -140,8 +140,8 @@ define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double>
}
; FUNC-LABEL: {{^}}test_concat_v1f64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
@@ -149,8 +149,8 @@ define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a,
}
; FUNC-LABEL: {{^}}test_concat_v2f64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
@@ -158,8 +158,8 @@ define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a,
}
; FUNC-LABEL: {{^}}test_concat_v4f64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
@@ -167,8 +167,8 @@ define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a,
}
; FUNC-LABEL: {{^}}test_concat_v8f64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
@@ -176,8 +176,8 @@ define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a
}
; FUNC-LABEL: {{^}}test_concat_v16f64:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
@@ -185,8 +185,8 @@ define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double>
}
; FUNC-LABEL: {{^}}test_concat_v1i1:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind {
%concat = shufflevector <1 x i1> %a, <1 x i1> %b, <2 x i32> <i32 0, i32 1>
store <2 x i1> %concat, <2 x i1> addrspace(1)* %out
@@ -194,8 +194,8 @@ define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1>
}
; FUNC-LABEL: {{^}}test_concat_v2i1:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind {
%concat = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i1> %concat, <4 x i1> addrspace(1)* %out
@@ -203,8 +203,8 @@ define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1>
}
; FUNC-LABEL: {{^}}test_concat_v4i1:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind {
%concat = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i1> %concat, <8 x i1> addrspace(1)* %out
@@ -212,8 +212,8 @@ define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1>
}
; FUNC-LABEL: {{^}}test_concat_v8i1:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind {
%concat = shufflevector <8 x i1> %a, <8 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i1> %concat, <16 x i1> addrspace(1)* %out
@@ -221,8 +221,8 @@ define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1
}
; FUNC-LABEL: {{^}}test_concat_v16i1:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind {
%concat = shufflevector <16 x i1> %a, <16 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i1> %concat, <32 x i1> addrspace(1)* %out
@@ -230,8 +230,8 @@ define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x
}
; FUNC-LABEL: {{^}}test_concat_v32i1:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind {
%concat = shufflevector <32 x i1> %a, <32 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
store <64 x i1> %concat, <64 x i1> addrspace(1)* %out
@@ -239,8 +239,8 @@ define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x
}
; FUNC-LABEL: {{^}}test_concat_v1i16:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
%concat = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> <i32 0, i32 1>
store <2 x i16> %concat, <2 x i16> addrspace(1)* %out, align 4
@@ -248,8 +248,8 @@ define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x
}
; FUNC-LABEL: {{^}}test_concat_v2i16:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
%concat = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i16> %concat, <4 x i16> addrspace(1)* %out, align 8
@@ -257,8 +257,8 @@ define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x
}
; FUNC-LABEL: {{^}}test_concat_v4i16:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
%concat = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i16> %concat, <8 x i16> addrspace(1)* %out, align 16
@@ -266,8 +266,8 @@ define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x
}
; FUNC-LABEL: {{^}}test_concat_v8i16:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
%concat = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i16> %concat, <16 x i16> addrspace(1)* %out, align 32
@@ -275,8 +275,8 @@ define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x
}
; FUNC-LABEL: {{^}}test_concat_v16i16:
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
; SI-NOT: MOVREL
; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
; SI-NOT: movrel
define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind {
%concat = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64

View File

@@ -1,9 +1,9 @@
; RUN: llc -march=r600 -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}test_copy_v4i8:
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: S_ENDPGM
; SI: buffer_load_dword [[REG:v[0-9]+]]
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
@@ -11,10 +11,10 @@ define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)*
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x2:
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: S_ENDPGM
; SI: buffer_load_dword [[REG:v[0-9]+]]
; SI: buffer_store_dword [[REG]]
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -23,11 +23,11 @@ define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x3:
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: S_ENDPGM
; SI: buffer_load_dword [[REG:v[0-9]+]]
; SI: buffer_store_dword [[REG]]
; SI: buffer_store_dword [[REG]]
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -37,12 +37,12 @@ define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x4:
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: S_ENDPGM
; SI: buffer_load_dword [[REG:v[0-9]+]]
; SI: buffer_store_dword [[REG]]
; SI: buffer_store_dword [[REG]]
; SI: buffer_store_dword [[REG]]
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -53,33 +53,33 @@ define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
}
; FUNC-LABEL: {{^}}test_copy_v4i8_extra_use:
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI-DAG: V_ADD
; SI-DAG: V_ADD
; SI-DAG: V_ADD
; SI-DAG: V_ADD
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI_DAG: BUFFER_STORE_BYTE
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI-DAG: v_add
; SI-DAG: v_add
; SI-DAG: v_add
; SI-DAG: v_add
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI_DAG: buffer_store_byte
; After scalarizing v4i8 loads is fixed.
; XSI: BUFFER_LOAD_DWORD
; XSI: buffer_load_dword
; XSI: V_BFE
; XSI: V_ADD
; XSI: V_ADD
; XSI: V_ADD
; XSI: BUFFER_STORE_DWORD
; XSI: BUFFER_STORE_DWORD
; XSI: buffer_store_dword
; XSI: buffer_store_dword
; SI: S_ENDPGM
; SI: s_endpgm
define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
@@ -89,35 +89,35 @@ define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> add
}
; FUNC-LABEL: {{^}}test_copy_v4i8_x2_extra_use:
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI-DAG: V_ADD
; SI-DAG: V_ADD
; SI-DAG: V_ADD
; SI-DAG: V_ADD
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI_DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI-DAG: BUFFER_STORE_BYTE
; SI_DAG: BUFFER_STORE_BYTE
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI-DAG: v_add
; SI-DAG: v_add
; SI-DAG: v_add
; SI-DAG: v_add
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI_DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI-DAG: buffer_store_byte
; SI_DAG: buffer_store_byte
; XSI: BUFFER_LOAD_DWORD
; XSI: buffer_load_dword
; XSI: BFE
; XSI: BUFFER_STORE_DWORD
; XSI: buffer_store_dword
; XSI: V_ADD
; XSI: BUFFER_STORE_DWORD
; XSI-NEXT: BUFFER_STORE_DWORD
; XSI: buffer_store_dword
; XSI-NEXT: buffer_store_dword
; SI: S_ENDPGM
; SI: s_endpgm
define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
@@ -128,9 +128,9 @@ define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8>
}
; FUNC-LABEL: {{^}}test_copy_v3i8:
; SI-NOT: BFE
; SI-NOT: BFI
; SI: S_ENDPGM
; SI-NOT: bfe
; SI-NOT: bfi
; SI: s_endpgm
define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
%val = load <3 x i8> addrspace(1)* %in, align 4
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
@@ -138,11 +138,11 @@ define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)*
}
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: S_ENDPGM
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: s_endpgm
define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load volatile <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
@@ -150,15 +150,15 @@ define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8>
}
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_store:
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_STORE_BYTE
; SI: BUFFER_STORE_BYTE
; SI: BUFFER_STORE_BYTE
; SI: BUFFER_STORE_BYTE
; SI: S_ENDPGM
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: s_endpgm
define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4

View File

@@ -6,11 +6,11 @@ declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
; SI: S_ENDPGM
; SI: s_load_dword [[VAL:s[0-9]+]],
; SI: s_flbit_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -20,10 +20,10 @@ define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
@@ -34,11 +34,11 @@ define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v2i32:
; SI: BUFFER_LOAD_DWORDX2
; SI: V_FFBH_U32_e32
; SI: V_FFBH_U32_e32
; SI: BUFFER_STORE_DWORDX2
; SI: S_ENDPGM
; SI: buffer_load_dwordx2
; SI: v_ffbh_u32_e32
; SI: v_ffbh_u32_e32
; SI: buffer_store_dwordx2
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
@@ -50,13 +50,13 @@ define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
}
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v4i32:
; SI: BUFFER_LOAD_DWORDX4
; SI: V_FFBH_U32_e32
; SI: V_FFBH_U32_e32
; SI: V_FFBH_U32_e32
; SI: V_FFBH_U32_e32
; SI: BUFFER_STORE_DWORDX4
; SI: S_ENDPGM
; SI: buffer_load_dwordx4
; SI: v_ffbh_u32_e32
; SI: v_ffbh_u32_e32
; SI: v_ffbh_u32_e32
; SI: v_ffbh_u32_e32
; SI: buffer_store_dwordx4
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]

View File

@@ -8,11 +8,11 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i32:
; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]],
; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]]
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
; SI: S_ENDPGM
; SI: s_load_dword [[SVAL:s[0-9]+]],
; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
; EG: BCNT_INT
define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -23,11 +23,11 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; XXX - Why 0 in register?
; FUNC-LABEL: {{^}}v_ctpop_i32:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -38,13 +38,13 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
}
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
; SI: buffer_load_dword [[VAL1:v[0-9]+]],
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -59,11 +59,11 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
}
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
; SI-NEXT: S_WAITCNT
; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
; SI-NEXT: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL0:v[0-9]+]],
; SI-NEXT: s_waitcnt
; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
; SI-NEXT: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
%val0 = load i32 addrspace(1)* %in0, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
@@ -73,9 +73,9 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
}
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: S_ENDPGM
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -87,11 +87,11 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: S_ENDPGM
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -105,15 +105,15 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: S_ENDPGM
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -131,23 +131,23 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
; SI: S_ENDPGM
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: v_bcnt_u32_b32_e32
; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -173,10 +173,10 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -188,10 +188,10 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -203,11 +203,11 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@@ -217,11 +217,11 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@@ -233,11 +233,11 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@@ -249,11 +249,11 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
@@ -270,11 +270,11 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
; but there are some cases when the should be allowed.
; FUNC-LABEL: {{^}}ctpop_i32_in_br:
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: V_MOV_B32_e32 [[RESULT]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BCNT_INT
define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
entry:

View File

@@ -7,11 +7,11 @@ declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i64:
; SI: S_LOAD_DWORDX2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: S_BCNT1_I32_B64 [[SRESULT:s[0-9]+]], [[SVAL]]
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
; SI: S_ENDPGM
; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
@@ -20,12 +20,12 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
}
; FUNC-LABEL: {{^}}v_ctpop_i64:
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
@@ -35,9 +35,9 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
}
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
; SI: S_BCNT1_I32_B64
; SI: S_BCNT1_I32_B64
; SI: S_ENDPGM
; SI: s_bcnt1_i32_b64
; SI: s_bcnt1_i32_b64
; SI: s_endpgm
define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
@@ -46,11 +46,11 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
}
; FUNC-LABEL: {{^}}s_ctpop_v4i64:
; SI: S_BCNT1_I32_B64
; SI: S_BCNT1_I32_B64
; SI: S_BCNT1_I32_B64
; SI: S_BCNT1_I32_B64
; SI: S_ENDPGM
; SI: s_bcnt1_i32_b64
; SI: s_bcnt1_i32_b64
; SI: s_bcnt1_i32_b64
; SI: s_bcnt1_i32_b64
; SI: s_endpgm
define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
@@ -59,11 +59,11 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
}
; FUNC-LABEL: {{^}}v_ctpop_v2i64:
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: S_ENDPGM
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: s_endpgm
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <2 x i64> addrspace(1)* %in, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
@@ -73,15 +73,15 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v4i64:
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: V_BCNT_U32_B32
; SI: S_ENDPGM
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: v_bcnt_u32_b32
; SI: s_endpgm
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <4 x i64> addrspace(1)* %in, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
@@ -94,12 +94,12 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
; but there are some cases when the should be allowed.
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
; SI: S_BCNT1_I32_B64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
; SI: V_MOV_B32_e32 v[[VLO:[0-9]+]], [[RESULT]]
; SI: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
; SI: S_ENDPGM
; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
; SI: s_endpgm
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0

View File

@@ -6,11 +6,11 @@ declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
; SI: S_ENDPGM
; SI: s_load_dword [[VAL:s[0-9]+]],
; SI: s_ff1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -20,10 +20,10 @@ define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_ffbl_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
@@ -34,11 +34,11 @@ define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v2i32:
; SI: BUFFER_LOAD_DWORDX2
; SI: V_FFBL_B32_e32
; SI: V_FFBL_B32_e32
; SI: BUFFER_STORE_DWORDX2
; SI: S_ENDPGM
; SI: buffer_load_dwordx2
; SI: v_ffbl_b32_e32
; SI: v_ffbl_b32_e32
; SI: buffer_store_dwordx2
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
@@ -50,13 +50,13 @@ define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
}
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v4i32:
; SI: BUFFER_LOAD_DWORDX4
; SI: V_FFBL_B32_e32
; SI: V_FFBL_B32_e32
; SI: V_FFBL_B32_e32
; SI: V_FFBL_B32_e32
; SI: BUFFER_STORE_DWORDX4
; SI: S_ENDPGM
; SI: buffer_load_dwordx4
; SI: v_ffbl_b32_e32
; SI: v_ffbl_b32_e32
; SI: v_ffbl_b32_e32
; SI: v_ffbl_b32_e32
; SI: buffer_store_dwordx4
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]

View File

@@ -1,11 +1,11 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}load_i8_to_f32:
; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]],
; SI-NOT: BFE
; SI-NOT: LSHR
; SI: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
; SI: BUFFER_STORE_DWORD [[CONV]],
; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
; SI-NOT: bfe
; SI-NOT: lshr
; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
; SI: buffer_store_dword [[CONV]],
define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%load = load i8 addrspace(1)* %in, align 1
%cvt = uitofp i8 %load to float
@@ -14,13 +14,13 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
}
; SI-LABEL: {{^}}load_v2i8_to_v2f32:
; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]],
; SI-NOT: BFE
; SI-NOT: LSHR
; SI-NOT: AND
; SI-DAG: V_CVT_F32_UBYTE1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
; SI: buffer_load_ushort [[LOADREG:v[0-9]+]],
; SI-NOT: bfe
; SI-NOT: lshr
; SI-NOT: and
; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <2 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <2 x i8> %load to <2 x float>
@@ -29,12 +29,12 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
}
; SI-LABEL: {{^}}load_v3i8_to_v3f32:
; SI-NOT: BFE
; SI-NOT: V_CVT_F32_UBYTE3_e32
; SI-DAG: V_CVT_F32_UBYTE2_e32
; SI-DAG: V_CVT_F32_UBYTE1_e32
; SI-DAG: V_CVT_F32_UBYTE0_e32
; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
; SI-NOT: bfe
; SI-NOT: v_cvt_f32_ubyte3_e32
; SI-DAG: v_cvt_f32_ubyte2_e32
; SI-DAG: v_cvt_f32_ubyte1_e32
; SI-DAG: v_cvt_f32_ubyte0_e32
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <3 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <3 x i8> %load to <3 x float>
@@ -43,18 +43,18 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
}
; SI-LABEL: {{^}}load_v4i8_to_v4f32:
; We can't use BUFFER_LOAD_DWORD here, because the load is byte aligned, and
; BUFFER_LOAD_DWORD requires dword alignment.
; SI: BUFFER_LOAD_USHORT
; SI: BUFFER_LOAD_USHORT
; SI: V_OR_B32_e32 [[LOADREG:v[0-9]+]]
; SI-NOT: BFE
; SI-NOT: LSHR
; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, [[LOADREG]]
; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, [[LOADREG]]
; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: BUFFER_STORE_DWORDX4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
; We can't use buffer_load_dword here, because the load is byte aligned, and
; buffer_load_dword requires dword alignment.
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]]
; SI-NOT: bfe
; SI-NOT: lshr
; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[LOADREG]]
; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]]
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
@@ -62,27 +62,27 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
ret void
}
; XXX - This should really still be able to use the V_CVT_F32_UBYTE0
; XXX - This should really still be able to use the v_cvt_f32_ubyte0
; for each component, but computeKnownBits doesn't handle vectors very
; well.
; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses:
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: BUFFER_LOAD_UBYTE
; SI: V_CVT_F32_UBYTE0_e32
; SI: V_CVT_F32_UBYTE0_e32
; SI: V_CVT_F32_UBYTE0_e32
; SI: V_CVT_F32_UBYTE0_e32
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: v_cvt_f32_ubyte0_e32
; SI: v_cvt_f32_ubyte0_e32
; SI: v_cvt_f32_ubyte0_e32
; SI: v_cvt_f32_ubyte0_e32
; XXX - replace with this when v4i8 loads aren't scalarized anymore.
; XSI: BUFFER_LOAD_DWORD
; XSI: V_CVT_F32_U32_e32
; XSI: V_CVT_F32_U32_e32
; XSI: V_CVT_F32_U32_e32
; XSI: V_CVT_F32_U32_e32
; SI: S_ENDPGM
; XSI: buffer_load_dword
; XSI: v_cvt_f32_u32_e32
; XSI: v_cvt_f32_u32_e32
; XSI: v_cvt_f32_u32_e32
; XSI: v_cvt_f32_u32_e32
; SI: s_endpgm
define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
@@ -94,7 +94,7 @@ define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <
; Make sure this doesn't crash.
; SI-LABEL: {{^}}load_v7i8_to_v7f32:
; SI: S_ENDPGM
; SI: s_endpgm
define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <7 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <7 x i8> %load to <7 x float>
@@ -103,27 +103,27 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
}
; SI-LABEL: {{^}}load_v8i8_to_v8f32:
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
; SI-NOT: BFE
; SI-NOT: LSHR
; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[LOLOAD]]
; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[LOLOAD]]
; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[LOLOAD]]
; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[LOLOAD]]
; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-NOT: BFE
; SI-NOT: LSHR
; SI: BUFFER_STORE_DWORD
; SI: BUFFER_STORE_DWORD
; SI: BUFFER_STORE_DWORD
; SI: BUFFER_STORE_DWORD
; SI: BUFFER_STORE_DWORD
; SI: BUFFER_STORE_DWORD
; SI: BUFFER_STORE_DWORD
; SI: BUFFER_STORE_DWORD
; SI: buffer_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
; SI-NOT: bfe
; SI-NOT: lshr
; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]]
; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[LOLOAD]]
; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[LOLOAD]]
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[LOLOAD]]
; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-NOT: bfe
; SI-NOT: lshr
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_dword
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <8 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <8 x i8> %load to <8 x float>
@@ -132,10 +132,10 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
}
; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]]
; SI: BUFFER_STORE_DWORD [[CONV]],
; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
; SI: buffer_store_dword [[CONV]],
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 2

View File

@@ -7,21 +7,21 @@ declare void @llvm.AMDGPU.barrier.local() #1
; Function Attrs: nounwind
; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
; CHECK: BB0_1:
; CHECK: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]]
; SI-DAG: V_ADD_I32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR4]]
; SI-DAG: V_ADD_I32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x80]]
; SI-DAG: V_ADD_I32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x84]]
; SI-DAG: V_ADD_I32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR0x100]]
; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
; CI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
; CI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]] offset:256
; CHECK: S_ENDPGM
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
; CHECK: s_endpgm
define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
entry:
%x.i = tail call i32 @llvm.r600.read.tidig.x() #0

View File

@@ -7,11 +7,11 @@
@lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
; SI-LABEL: @simple_read2_f32
; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: S_ENDPGM
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @simple_read2_f32(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -26,11 +26,11 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_max_offset
; SI: DS_READ2_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: S_ENDPGM
; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -45,10 +45,10 @@ define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_too_far
; SI-NOT DS_READ2_B32
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
; SI: S_ENDPGM
; SI-NOT ds_read2_b32
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
; SI: s_endpgm
define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -63,9 +63,9 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_x2
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: S_ENDPGM
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 0
@@ -94,10 +94,10 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
; Make sure there is an instruction between the two sets of reads.
; SI-LABEL: @simple_read2_f32_x2_barrier
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
; SI: S_BARRIER
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: S_ENDPGM
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
; SI: s_barrier
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 0
@@ -130,9 +130,9 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
; element results in only folding the inner pair.
; SI-LABEL: @simple_read2_f32_x2_nonzero_base
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: S_ENDPGM
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
@@ -166,10 +166,10 @@ define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
; register. We can't safely merge this.
; SI-LABEL: @read2_ptr_is_subreg_arg_f32
; SI-NOT: DS_READ2_B32
; SI: DS_READ_B32
; SI: DS_READ_B32
; SI: S_ENDPGM
; SI-NOT: ds_read2_b32
; SI: ds_read_b32
; SI: ds_read_b32
; SI: s_endpgm
define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
@@ -192,10 +192,10 @@ define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float ad
; subregisters.
; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32
; SI-NOT: DS_READ2_B32
; SI: DS_READ_B32
; SI: DS_READ_B32
; SI: S_ENDPGM
; SI-NOT: ds_read2_b32
; SI: ds_read_b32
; SI: ds_read_b32
; SI: s_endpgm
define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
@@ -217,10 +217,10 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f
}
; We should be able to merge in this case, but probably not worth the effort.
; SI-NOT: DS_READ2_B32
; SI: DS_READ_B32
; SI: DS_READ_B32
; SI: S_ENDPGM
; SI-NOT: ds_read2_b32
; SI: ds_read_b32
; SI: ds_read_b32
; SI: s_endpgm
define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0
@@ -241,10 +241,10 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_volatile_0
; SI-NOT DS_READ2_B32
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
; SI: S_ENDPGM
; SI-NOT ds_read2_b32
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
; SI: s_endpgm
define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -259,10 +259,10 @@ define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f32_volatile_1
; SI-NOT DS_READ2_B32
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
; SI: S_ENDPGM
; SI-NOT ds_read2_b32
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
; SI: s_endpgm
define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -280,8 +280,8 @@ define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
; XXX: This isn't really testing anything useful now. I think CI
; allows unaligned LDS accesses, which would be a problem here.
; SI-LABEL: @unaligned_read2_f32
; SI-NOT: DS_READ2_B32
; SI: S_ENDPGM
; SI-NOT: ds_read2_b32
; SI: s_endpgm
define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
@@ -296,8 +296,8 @@ define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %
}
; SI-LABEL: @misaligned_2_simple_read2_f32
; SI-NOT: DS_READ2_B32
; SI: S_ENDPGM
; SI-NOT: ds_read2_b32
; SI: s_endpgm
define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
@@ -312,11 +312,11 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
}
; SI-LABEL: @simple_read2_f64
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
; SI: DS_READ2_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
; SI: S_ENDPGM
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]]
; SI: s_endpgm
define void @simple_read2_f64(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
@@ -331,8 +331,8 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f64_max_offset
; SI: DS_READ2_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
; SI: S_ENDPGM
; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
; SI: s_endpgm
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
@@ -347,10 +347,10 @@ define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2_f64_too_far
; SI-NOT DS_READ2_B64
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
; SI: S_ENDPGM
; SI-NOT ds_read2_b64
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
; SI: s_endpgm
define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
@@ -366,9 +366,9 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
; Alignment only 4
; SI-LABEL: @misaligned_read2_f64
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
; SI: S_ENDPGM
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
; SI: s_endpgm
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
@@ -385,8 +385,8 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
; SI-LABEL: @load_constant_adjacent_offsets
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
%val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
%val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
@@ -396,8 +396,8 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
}
; SI-LABEL: @load_constant_disjoint_offsets
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
%val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
%val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@@ -409,9 +409,9 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
; SI-LABEL: @load_misaligned64_constant_offsets
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
%val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
%val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
@@ -423,11 +423,11 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
; SI-LABEL: @load_misaligned64_constant_large_offsets
; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000
; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
; SI: S_ENDPGM
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
; SI: s_endpgm
define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
%val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
%val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4

View File

@@ -5,11 +5,11 @@
; SI-LABEL: @simple_read2st64_f32_0_1
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: S_ENDPGM
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -24,11 +24,11 @@ define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2st64_f32_1_2
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: S_ENDPGM
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
@@ -44,11 +44,11 @@ define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(
}
; SI-LABEL: @simple_read2st64_f32_max_offset
; SI: DS_READ2ST64_B32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: S_ENDPGM
; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
@@ -64,11 +64,11 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
}
; SI-LABEL: @simple_read2st64_f32_over_max_offset
; SI-NOT: DS_READ2ST64_B32
; SI: DS_READ_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
; SI: DS_READ_B32 {{v[0-9]+}}, [[BIGADD]]
; SI: S_ENDPGM
; SI-NOT: ds_read2st64_b32
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
@@ -84,8 +84,8 @@ define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, floa
}
; SI-LABEL: @odd_invalid_read2st64_f32_0
; SI-NOT: DS_READ2ST64_B32
; SI: S_ENDPGM
; SI-NOT: ds_read2st64_b32
; SI: s_endpgm
define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -100,8 +100,8 @@ define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @odd_invalid_read2st64_f32_1
; SI-NOT: DS_READ2ST64_B32
; SI: S_ENDPGM
; SI-NOT: ds_read2st64_b32
; SI: s_endpgm
define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
@@ -117,11 +117,11 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2st64_f64_0_1
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
; SI: S_ENDPGM
; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]]
; SI: s_endpgm
define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
@@ -136,11 +136,11 @@ define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
}
; SI-LABEL: @simple_read2st64_f64_1_2
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
; SI: S_ENDPGM
; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]]
; SI: s_endpgm
define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
@@ -158,9 +158,9 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
; Alignment only
; SI-LABEL: @misaligned_read2st64_f64
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
; SI: S_ENDPGM
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
; SI: s_endpgm
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
@@ -176,11 +176,11 @@ define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspac
; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
; SI-LABEL: @simple_read2st64_f64_max_offset
; SI: DS_READ2ST64_B64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
; SI: S_WAITCNT lgkmcnt(0)
; SI: V_ADD_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
; SI: S_ENDPGM
; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]]
; SI: s_endpgm
define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 256
@@ -196,11 +196,11 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
}
; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: DS_READ2ST64_B64
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: V_ADD_I32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
; SI: DS_READ_B64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: S_ENDPGM
; SI-NOT: ds_read2st64_b64
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
@@ -216,8 +216,8 @@ define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, dou
}
; SI-LABEL: @invalid_read2st64_f64_odd_offset
; SI-NOT: DS_READ2ST64_B64
; SI: S_ENDPGM
; SI-NOT: ds_read2st64_b64
; SI: s_endpgm
define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
@@ -236,9 +236,9 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
; stride in elements, not bytes, is a multiple of 64.
; SI-LABEL: @byte_size_only_divisible_64_read2_f64
; SI-NOT: DS_READ2ST_B64
; SI: DS_READ2_B64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
; SI: S_ENDPGM
; SI-NOT: ds_read2st_b64
; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
; SI: s_endpgm
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i

View File

@@ -5,10 +5,10 @@
; SI-LABEL: @simple_write2_one_val_f32
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
; SI: s_endpgm
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
@@ -22,11 +22,11 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
}
; SI-LABEL: @simple_write2_two_val_f32
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
; SI: s_endpgm
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
@@ -42,10 +42,10 @@ define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1
}
; SI-LABEL: @simple_write2_two_val_f32_volatile_0
; SI-NOT: DS_WRITE2_B32
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
; SI: S_ENDPGM
; SI-NOT: ds_write2_b32
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
@@ -61,10 +61,10 @@ define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float
}
; SI-LABEL: @simple_write2_two_val_f32_volatile_1
; SI-NOT: DS_WRITE2_B32
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
; SI: S_ENDPGM
; SI-NOT: ds_write2_b32
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
@@ -81,11 +81,11 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
; 2 data subregisters from different super registers.
; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
; SI: BUFFER_LOAD_DWORDX2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
@@ -103,10 +103,10 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
}
; SI-LABEL: @simple_write2_two_val_subreg2_f32
; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
@@ -122,10 +122,10 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
}
; SI-LABEL: @simple_write2_two_val_subreg4_f32
; SI-DAG: BUFFER_LOAD_DWORDX4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
; SI: s_endpgm
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
@@ -141,11 +141,11 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
}
; SI-LABEL: @simple_write2_two_val_max_offset_f32
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
; SI: s_endpgm
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
@@ -161,9 +161,9 @@ define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float
}
; SI-LABEL: @simple_write2_two_val_too_far_f32
; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
; SI: S_ENDPGM
; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
; SI: s_endpgm
define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
@@ -179,9 +179,9 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
}
; SI-LABEL: @simple_write2_two_val_f32_x2
; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: S_ENDPGM
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
@@ -209,9 +209,9 @@ define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspac
}
; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: S_ENDPGM
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
@@ -239,10 +239,10 @@ define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, f
}
; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
; SI-NOT: DS_WRITE2_B32
; SI: DS_WRITE_B32
; SI: DS_WRITE_B32
; SI: S_ENDPGM
; SI-NOT: ds_write2_b32
; SI: ds_write_b32
; SI: ds_write_b32
; SI: s_endpgm
define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
@@ -266,10 +266,10 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
}
; SI-LABEL: @simple_write2_one_val_f64
; SI: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]],
; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: DS_WRITE2_B64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
; SI: s_endpgm
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
@@ -283,11 +283,11 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
}
; SI-LABEL: @misaligned_simple_write2_one_val_f64
; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
; SI: s_endpgm
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
@@ -301,11 +301,11 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
}
; SI-LABEL: @simple_write2_two_val_f64
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: DS_WRITE2_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
; SI: s_endpgm
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
@@ -323,8 +323,8 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
; SI-LABEL: @store_constant_adjacent_offsets
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
define void @store_constant_adjacent_offsets() {
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
@@ -332,9 +332,9 @@ define void @store_constant_adjacent_offsets() {
}
; SI-LABEL: @store_constant_disjoint_offsets
; SI-DAG: V_MOV_B32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
; SI-DAG: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: DS_WRITE2_B32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
define void @store_constant_disjoint_offsets() {
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
@@ -344,9 +344,9 @@ define void @store_constant_disjoint_offsets() {
@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
; SI-LABEL: @store_misaligned64_constant_offsets
; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
define void @store_misaligned64_constant_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
@@ -356,11 +356,11 @@ define void @store_misaligned64_constant_offsets() {
@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
; SI-LABEL: @store_misaligned64_constant_large_offsets
; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
; SI-DAG: DS_WRITE2_B32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI-DAG: DS_WRITE2_B32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: S_ENDPGM
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: s_endpgm
define void @store_misaligned64_constant_large_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4

View File

@@ -5,10 +5,10 @@
; SI-LABEL: @simple_write2st64_one_val_f32_0_1
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
; SI: s_endpgm
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
@@ -22,11 +22,11 @@ define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float add
}
; SI-LABEL: @simple_write2st64_two_val_f32_2_5
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
; SI: s_endpgm
define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
@@ -43,11 +43,11 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
}
; SI-LABEL: @simple_write2st64_two_val_max_offset_f32
; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: DS_WRITE2ST64_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
@@ -63,11 +63,11 @@ define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, fl
}
; SI-LABEL: @simple_write2st64_two_val_max_offset_f64
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
; SI-DAG: V_ADD_I32_e32 [[VPTR:v[0-9]+]],
; SI: DS_WRITE2ST64_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
; SI: S_ENDPGM
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]],
; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
@@ -84,9 +84,9 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
}
; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
; SI-NOT: DS_WRITE2ST64_B64
; SI: DS_WRITE2_B64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
; SI: S_ENDPGM
; SI-NOT: ds_write2st64_b64
; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
; SI: s_endpgm
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i

View File

@@ -54,9 +54,9 @@ define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 a
}
; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: BUFFER_STORE_DWORDX2
; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in, align 8
%ext = sext i8 %a to i64
@@ -65,9 +65,9 @@ define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
}
; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: BUFFER_STORE_DWORDX2
; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in, align 8
%ext = sext i16 %a to i64
@@ -76,9 +76,9 @@ define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
}
; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: BUFFER_STORE_DWORDX2
; SI: buffer_load_dword [[LOAD:v[0-9]+]],
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%a = load i32 addrspace(1)* %in, align 8
%ext = sext i32 %a to i64
@@ -87,10 +87,10 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
}
; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
; SI-DAG: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: BUFFER_STORE_DWORDX2
; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
; SI-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: buffer_store_dwordx2
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in, align 8
%ext = zext i8 %a to i64
@@ -99,10 +99,10 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
}
; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
; SI-DAG: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: BUFFER_STORE_DWORDX2
; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
; SI-DAG: buffer_load_ushort [[LOAD:v[0-9]+]],
; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: buffer_store_dwordx2
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in, align 8
%ext = zext i16 %a to i64
@@ -111,10 +111,10 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
}
; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
; SI-DAG: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: BUFFER_STORE_DWORDX2
; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
; SI-DAG: buffer_load_dword [[LOAD:v[0-9]+]],
; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: buffer_store_dwordx2
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%a = load i32 addrspace(1)* %in, align 8
%ext = zext i32 %a to i64

View File

@@ -1,10 +1,10 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}extract_vector_elt_v2i16:
; SI: BUFFER_LOAD_USHORT
; SI: BUFFER_LOAD_USHORT
; SI: BUFFER_STORE_SHORT
; SI: BUFFER_STORE_SHORT
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_store_short
; SI: buffer_store_short
define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
%p0 = extractelement <2 x i16> %foo, i32 0
%p1 = extractelement <2 x i16> %foo, i32 1
@@ -15,10 +15,10 @@ define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) no
}
; FUNC-LABEL: {{^}}extract_vector_elt_v4i16:
; SI: BUFFER_LOAD_USHORT
; SI: BUFFER_LOAD_USHORT
; SI: BUFFER_STORE_SHORT
; SI: BUFFER_STORE_SHORT
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: buffer_store_short
; SI: buffer_store_short
define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
%p0 = extractelement <4 x i16> %foo, i32 0
%p1 = extractelement <4 x i16> %foo, i32 2

View File

@@ -8,8 +8,8 @@ declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
; FUNC-LABEL: {{^}}v_fabs_f64:
; SI: V_AND_B32
; SI: S_ENDPGM
; SI: v_and_b32
; SI: s_endpgm
define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%tidext = sext i32 %tid to i64
@@ -21,9 +21,9 @@ define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
}
; FUNC-LABEL: {{^}}fabs_f64:
; SI: V_AND_B32
; SI-NOT: V_AND_B32
; SI: S_ENDPGM
; SI: v_and_b32
; SI-NOT: v_and_b32
; SI: s_endpgm
define void @fabs_f64(double addrspace(1)* %out, double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
store double %fabs, double addrspace(1)* %out
@@ -31,9 +31,9 @@ define void @fabs_f64(double addrspace(1)* %out, double %in) {
}
; FUNC-LABEL: {{^}}fabs_v2f64:
; SI: V_AND_B32
; SI: V_AND_B32
; SI: S_ENDPGM
; SI: v_and_b32
; SI: v_and_b32
; SI: s_endpgm
define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
store <2 x double> %fabs, <2 x double> addrspace(1)* %out
@@ -41,11 +41,11 @@ define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
}
; FUNC-LABEL: {{^}}fabs_v4f64:
; SI: V_AND_B32
; SI: V_AND_B32
; SI: V_AND_B32
; SI: V_AND_B32
; SI: S_ENDPGM
; SI: v_and_b32
; SI: v_and_b32
; SI: v_and_b32
; SI: v_and_b32
; SI: s_endpgm
define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
store <4 x double> %fabs, <4 x double> addrspace(1)* %out
@@ -53,10 +53,10 @@ define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
}
; SI-LABEL: {{^}}fabs_fold_f64:
; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: AND
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
; SI: S_ENDPGM
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: and
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
; SI: s_endpgm
define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @llvm.fabs.f64(double %in0)
%fmul = fmul double %fabs, %in1
@@ -65,10 +65,10 @@ define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1)
}
; SI-LABEL: {{^}}fabs_fn_fold_f64:
; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: AND
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
; SI: S_ENDPGM
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: and
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
; SI: s_endpgm
define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @fabs(double %in0)
%fmul = fmul double %fabs, %in1
@@ -77,8 +77,8 @@ define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in
}
; FUNC-LABEL: {{^}}fabs_free_f64:
; SI: V_AND_B32
; SI: S_ENDPGM
; SI: v_and_b32
; SI: s_endpgm
define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
%bc= bitcast i64 %in to double
%fabs = call double @llvm.fabs.f64(double %bc)
@@ -87,8 +87,8 @@ define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
}
; FUNC-LABEL: {{^}}fabs_fn_free_f64:
; SI: V_AND_B32
; SI: S_ENDPGM
; SI: v_and_b32
; SI: s_endpgm
define void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
%bc= bitcast i64 %in to double
%fabs = call double @fabs(double %bc)

View File

@@ -10,7 +10,7 @@
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
; SI: V_AND_B32
; SI: v_and_b32
define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
%bc= bitcast i32 %in to float
@@ -23,7 +23,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
; SI: V_AND_B32
; SI: v_and_b32
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
%bc= bitcast i32 %in to float
@@ -35,7 +35,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) {
; FUNC-LABEL: {{^}}fabs_f32:
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; SI: V_AND_B32
; SI: v_and_b32
define void @fabs_f32(float addrspace(1)* %out, float %in) {
%fabs = call float @llvm.fabs.f32(float %in)
store float %fabs, float addrspace(1)* %out
@@ -46,8 +46,8 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) {
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; SI: V_AND_B32
; SI: V_AND_B32
; SI: v_and_b32
; SI: v_and_b32
define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@@ -60,10 +60,10 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; SI: V_AND_B32
; SI: V_AND_B32
; SI: V_AND_B32
; SI: V_AND_B32
; SI: v_and_b32
; SI: v_and_b32
; SI: v_and_b32
; SI: v_and_b32
define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
store <4 x float> %fabs, <4 x float> addrspace(1)* %out
@@ -71,9 +71,9 @@ define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
}
; SI-LABEL: {{^}}fabs_fn_fold:
; SI: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: AND
; SI: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: and
; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @fabs(float %in0)
%fmul = fmul float %fabs, %in1
@@ -82,9 +82,9 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
}
; SI-LABEL: {{^}}fabs_fold:
; SI: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: AND
; SI: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: and
; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @llvm.fabs.f32(float %in0)
%fmul = fmul float %fabs, %in1

View File

@@ -3,7 +3,7 @@
; FUNC-LABEL: {{^}}fadd_f32:
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
; SI: V_ADD_F32
; SI: v_add_f32
define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
%add = fadd float %a, %b
store float %add, float addrspace(1)* %out, align 4
@@ -13,8 +13,8 @@ define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
; FUNC-LABEL: {{^}}fadd_v2f32:
; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: v_add_f32
; SI: v_add_f32
define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
%add = fadd <2 x float> %a, %b
store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8
@@ -26,10 +26,10 @@ define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1)* %in, align 16
@@ -48,14 +48,14 @@ define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
; R600: ADD
; R600: ADD
; R600: ADD
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: V_ADD_F32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) {
%add = fadd <8 x float> %a, %b
store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32

View File

@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fadd_f64:
; CHECK: V_ADD_F64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {

View File

@@ -9,7 +9,7 @@ declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
; FUNC-LABEL: {{^}}fceil_f32:
; SI: V_CEIL_F32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: CEIL {{\*? *}}[[RESULT]]
define void @fceil_f32(float addrspace(1)* %out, float %x) {
@@ -19,8 +19,8 @@ define void @fceil_f32(float addrspace(1)* %out, float %x) {
}
; FUNC-LABEL: {{^}}fceil_v2f32:
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
@@ -31,9 +31,9 @@ define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
}
; FUNC-LABEL: {{^}}fceil_v3f32:
; FIXME-SI: V_CEIL_F32_e32
; FIXME-SI: V_CEIL_F32_e32
; FIXME-SI: V_CEIL_F32_e32
; FIXME-SI: v_ceil_f32_e32
; FIXME-SI: v_ceil_f32_e32
; FIXME-SI: v_ceil_f32_e32
; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
@@ -47,10 +47,10 @@ define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
}
; FUNC-LABEL: {{^}}fceil_v4f32:
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
@@ -63,14 +63,14 @@ define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
}
; FUNC-LABEL: {{^}}fceil_v8f32:
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
@@ -88,22 +88,22 @@ define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
}
; FUNC-LABEL: {{^}}fceil_v16f32:
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: V_CEIL_F32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}

View File

@@ -9,25 +9,25 @@ declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
; FUNC-LABEL: {{^}}fceil_f64:
; CI: V_CEIL_F64_e32
; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: S_LSHR_B64
; SI: S_NOT_B64
; SI: S_AND_B64
; SI-DAG: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI-DAG: CMP_LT_I32
; SI: CNDMASK_B32
; SI: CNDMASK_B32
; SI: CMP_GT_I32
; SI: CNDMASK_B32
; SI: CNDMASK_B32
; SI: CMP_GT_F64
; SI: CNDMASK_B32
; SI: CMP_NE_I32
; SI: CNDMASK_B32
; SI: CNDMASK_B32
; SI: V_ADD_F64
; CI: v_ceil_f64_e32
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: s_lshr_b64
; SI: s_not_b64
; SI: s_and_b64
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI-DAG: cmp_lt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: cmp_gt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: cmp_gt_f64
; SI: cndmask_b32
; SI: cmp_ne_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: v_add_f64
define void @fceil_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.ceil.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
@@ -35,8 +35,8 @@ define void @fceil_f64(double addrspace(1)* %out, double %x) {
}
; FUNC-LABEL: {{^}}fceil_v2f64:
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
@@ -44,9 +44,9 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
}
; FIXME-FUNC-LABEL: {{^}}fceil_v3f64:
; FIXME-CI: V_CEIL_F64_e32
; FIXME-CI: V_CEIL_F64_e32
; FIXME-CI: V_CEIL_F64_e32
; FIXME-CI: v_ceil_f64_e32
; FIXME-CI: v_ceil_f64_e32
; FIXME-CI: v_ceil_f64_e32
; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
@@ -54,10 +54,10 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
; }
; FUNC-LABEL: {{^}}fceil_v4f64:
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
@@ -65,14 +65,14 @@ define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
}
; FUNC-LABEL: {{^}}fceil_v8f64:
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
@@ -80,22 +80,22 @@ define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
}
; FUNC-LABEL: {{^}}fceil_v16f64:
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: V_CEIL_F64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
; CI: v_ceil_f64_e32
define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out

View File

@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}flt_f64:
; CHECK: V_CMP_LT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_lt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
@@ -14,7 +14,7 @@ define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; CHECK: {{^}}fle_f64:
; CHECK: V_CMP_LE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_le_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
@@ -27,7 +27,7 @@ define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; CHECK: {{^}}fgt_f64:
; CHECK: V_CMP_GT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_gt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
@@ -40,7 +40,7 @@ define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; CHECK: {{^}}fge_f64:
; CHECK: V_CMP_GE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_ge_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
@@ -53,7 +53,7 @@ define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; CHECK: {{^}}fne_f64:
; CHECK: V_CMP_NEQ_F64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
@@ -66,7 +66,7 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; CHECK: {{^}}feq_f64:
; CHECK: V_CMP_EQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
; CHECK: v_cmp_eq_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {

View File

@@ -1,8 +1,8 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fconst_f64:
; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000
; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000
; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r1 = load double addrspace(1)* %in

View File

@@ -8,14 +8,14 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read
; Try to identify arg based on higher address.
; FUNC-LABEL: {{^}}test_copysign_f32:
; SI: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
; SI-DAG: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; EG: BFI_INT
define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
@@ -25,7 +25,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
}
; FUNC-LABEL: {{^}}test_copysign_v2f32:
; SI: S_ENDPGM
; SI: s_endpgm
; EG: BFI_INT
; EG: BFI_INT
@@ -36,7 +36,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
}
; FUNC-LABEL: {{^}}test_copysign_v4f32:
; SI: S_ENDPGM
; SI: s_endpgm
; EG: BFI_INT
; EG: BFI_INT

View File

@@ -5,15 +5,15 @@ declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind r
declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
; FUNC-LABEL: {{^}}test_copysign_f64:
; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
; SI: S_ENDPGM
; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
; SI: s_endpgm
define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
%result = call double @llvm.copysign.f64(double %mag, double %sign)
store double %result, double addrspace(1)* %out, align 8
@@ -21,7 +21,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s
}
; FUNC-LABEL: {{^}}test_copysign_v2f64:
; SI: S_ENDPGM
; SI: s_endpgm
define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
%result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
@@ -29,7 +29,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %
}
; FUNC-LABEL: {{^}}test_copysign_v4f64:
; SI: S_ENDPGM
; SI: s_endpgm
define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
%result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8

View File

@@ -11,8 +11,8 @@
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
; SI-DAG: V_RCP_F32
; SI-DAG: V_MUL_F32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fdiv float %a, %b
@@ -28,10 +28,10 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
; SI-DAG: V_RCP_F32
; SI-DAG: V_MUL_F32
; SI-DAG: V_RCP_F32
; SI-DAG: V_MUL_F32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
entry:
%0 = fdiv <2 x float> %a, %b
@@ -49,14 +49,14 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
; SI-DAG: V_RCP_F32
; SI-DAG: V_MUL_F32
; SI-DAG: V_RCP_F32
; SI-DAG: V_MUL_F32
; SI-DAG: V_RCP_F32
; SI-DAG: V_MUL_F32
; SI-DAG: V_RCP_F32
; SI-DAG: V_MUL_F32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1) * %in

View File

@@ -1,8 +1,8 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fdiv_f64:
; CHECK: V_RCP_F64_e32 {{v\[[0-9]+:[0-9]+\]}}
; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}}
; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {

View File

@@ -9,26 +9,26 @@ declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
; FUNC-LABEL: {{^}}ffloor_f64:
; CI: V_FLOOR_F64_e32
; CI: v_floor_f64_e32
; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: S_LSHR_B64
; SI: S_NOT_B64
; SI: S_AND_B64
; SI-DAG: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI-DAG: CMP_LT_I32
; SI: CNDMASK_B32
; SI: CNDMASK_B32
; SI: CMP_GT_I32
; SI: CNDMASK_B32
; SI: CNDMASK_B32
; SI: CMP_LT_F64
; SI: CNDMASK_B32
; SI: CMP_NE_I32
; SI: CNDMASK_B32
; SI: CNDMASK_B32
; SI: V_ADD_F64
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: s_lshr_b64
; SI: s_not_b64
; SI: s_and_b64
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI-DAG: cmp_lt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: cmp_gt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: cmp_lt_f64
; SI: cndmask_b32
; SI: cmp_ne_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: v_add_f64
define void @ffloor_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.floor.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
@@ -36,8 +36,8 @@ define void @ffloor_f64(double addrspace(1)* %out, double %x) {
}
; FUNC-LABEL: {{^}}ffloor_v2f64:
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
@@ -45,9 +45,9 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
}
; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
; FIXME-CI: V_FLOOR_F64_e32
; FIXME-CI: V_FLOOR_F64_e32
; FIXME-CI: V_FLOOR_F64_e32
; FIXME-CI: v_floor_f64_e32
; FIXME-CI: v_floor_f64_e32
; FIXME-CI: v_floor_f64_e32
; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
@@ -55,10 +55,10 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
; }
; FUNC-LABEL: {{^}}ffloor_v4f64:
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
@@ -66,14 +66,14 @@ define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
}
; FUNC-LABEL: {{^}}ffloor_v8f64:
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
@@ -81,22 +81,22 @@ define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
}
; FUNC-LABEL: {{^}}ffloor_v16f64:
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: V_FLOOR_F64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out

View File

@@ -6,8 +6,8 @@
; CHECK-LABEL: {{^}}branch_use_flat_i32:
; CHECK: FLAT_STORE_DWORD {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
; CHECK: S_ENDPGM
; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
; CHECK: s_endpgm
define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
entry:
%cmp = icmp ne i32 %c, 0
@@ -35,10 +35,10 @@ end:
; remove generic pointers.
; CHECK-LABEL: {{^}}store_flat_i32:
; CHECK: V_MOV_B32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
; CHECK: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
; CHECK: V_MOV_B32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
; CHECK: FLAT_STORE_DWORD v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
store i32 %x, i32 addrspace(4)* %fptr, align 4
@@ -46,7 +46,7 @@ define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
}
; CHECK-LABEL: {{^}}store_flat_i64:
; CHECK: FLAT_STORE_DWORDX2
; CHECK: flat_store_dwordx2
define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
store i64 %x, i64 addrspace(4)* %fptr, align 8
@@ -54,7 +54,7 @@ define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
}
; CHECK-LABEL: {{^}}store_flat_v4i32:
; CHECK: FLAT_STORE_DWORDX4
; CHECK: flat_store_dwordx4
define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
@@ -62,7 +62,7 @@ define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
}
; CHECK-LABEL: {{^}}store_flat_trunc_i16:
; CHECK: FLAT_STORE_SHORT
; CHECK: flat_store_short
define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
%y = trunc i32 %x to i16
@@ -71,7 +71,7 @@ define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
}
; CHECK-LABEL: {{^}}store_flat_trunc_i8:
; CHECK: FLAT_STORE_BYTE
; CHECK: flat_store_byte
define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
%y = trunc i32 %x to i8
@@ -82,7 +82,7 @@ define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
; CHECK-LABEL @load_flat_i32:
; CHECK: FLAT_LOAD_DWORD
; CHECK: flat_load_dword
define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
%fload = load i32 addrspace(4)* %fptr, align 4
@@ -91,7 +91,7 @@ define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noa
}
; CHECK-LABEL @load_flat_i64:
; CHECK: FLAT_LOAD_DWORDX2
; CHECK: flat_load_dwordx2
define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
%fload = load i64 addrspace(4)* %fptr, align 4
@@ -100,7 +100,7 @@ define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
}
; CHECK-LABEL @load_flat_v4i32:
; CHECK: FLAT_LOAD_DWORDX4
; CHECK: flat_load_dwordx4
define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
%fload = load <4 x i32> addrspace(4)* %fptr, align 4
@@ -109,7 +109,7 @@ define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> add
}
; CHECK-LABEL @sextload_flat_i8:
; CHECK: FLAT_LOAD_SBYTE
; CHECK: flat_load_sbyte
define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
%fload = load i8 addrspace(4)* %fptr, align 4
@@ -119,7 +119,7 @@ define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
}
; CHECK-LABEL @zextload_flat_i8:
; CHECK: FLAT_LOAD_UBYTE
; CHECK: flat_load_ubyte
define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
%fload = load i8 addrspace(4)* %fptr, align 4
@@ -129,7 +129,7 @@ define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
}
; CHECK-LABEL @sextload_flat_i16:
; CHECK: FLAT_LOAD_SSHORT
; CHECK: flat_load_sshort
define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
%fload = load i16 addrspace(4)* %fptr, align 4
@@ -139,7 +139,7 @@ define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
}
; CHECK-LABEL @zextload_flat_i16:
; CHECK: FLAT_LOAD_USHORT
; CHECK: flat_load_ushort
define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
%fload = load i16 addrspace(4)* %fptr, align 4
@@ -155,12 +155,12 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
; Check for prologue initializing special SGPRs pointing to scratch.
; CHECK-LABEL: {{^}}store_flat_scratch:
; CHECK: S_MOVK_I32 flat_scratch_lo, 0
; CHECK-NO-PROMOTE: S_MOVK_I32 flat_scratch_hi, 40
; CHECK-PROMOTE: S_MOVK_I32 flat_scratch_hi, 0
; CHECK: FLAT_STORE_DWORD
; CHECK: S_BARRIER
; CHECK: FLAT_LOAD_DWORD
; CHECK: s_movk_i32 flat_scratch_lo, 0
; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 40
; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0
; CHECK: flat_store_dword
; CHECK: s_barrier
; CHECK: flat_load_dword
define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
%alloca = alloca i32, i32 9, align 4
%x = call i32 @llvm.r600.read.tidig.x() #3

View File

@@ -6,7 +6,7 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) n
; FUNC-LABEL: {{^}}fma_f64:
; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
%r0 = load double addrspace(1)* %in1
@@ -18,8 +18,8 @@ define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; FUNC-LABEL: {{^}}fma_v2f64:
; SI: V_FMA_F64
; SI: V_FMA_F64
; SI: v_fma_f64
; SI: v_fma_f64
define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
%r0 = load <2 x double> addrspace(1)* %in1
@@ -31,10 +31,10 @@ define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1
}
; FUNC-LABEL: {{^}}fma_v4f64:
; SI: V_FMA_F64
; SI: V_FMA_F64
; SI: V_FMA_F64
; SI: V_FMA_F64
; SI: v_fma_f64
; SI: v_fma_f64
; SI: v_fma_f64
; SI: v_fma_f64
define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
%r0 = load <4 x double> addrspace(1)* %in1

View File

@@ -6,7 +6,7 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounw
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
; FUNC-LABEL: {{^}}fma_f32:
; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}},
; EG: FMA {{\*? *}}[[RES]]
@@ -21,8 +21,8 @@ define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
}
; FUNC-LABEL: {{^}}fma_v2f32:
; SI: V_FMA_F32
; SI: V_FMA_F32
; SI: v_fma_f32
; SI: v_fma_f32
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].[[CHLO:[XYZW]]][[CHHI:[XYZW]]], {{T[0-9]\.[XYZW]}},
; EG-DAG: FMA {{\*? *}}[[RES]].[[CHLO]]
@@ -38,10 +38,10 @@ define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)*
}
; FUNC-LABEL: {{^}}fma_v4f32:
; SI: V_FMA_F32
; SI: V_FMA_F32
; SI: V_FMA_F32
; SI: V_FMA_F32
; SI: v_fma_f32
; SI: v_fma_f32
; SI: v_fma_f32
; SI: v_fma_f32
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].{{[XYZW][XYZW][XYZW][XYZW]}}, {{T[0-9]\.[XYZW]}},
; EG-DAG: FMA {{\*? *}}[[RES]].X

View File

@@ -7,7 +7,7 @@ declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) #0
declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0
; FUNC-LABEL: @test_fmax_f64
; SI: V_MAX_F64
; SI: v_max_f64
define void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
%val = call double @llvm.maxnum.f64(double %a, double %b) #0
store double %val, double addrspace(1)* %out, align 8
@@ -15,8 +15,8 @@ define void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) noun
}
; FUNC-LABEL: @test_fmax_v2f64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: v_max_f64
; SI: v_max_f64
define void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0
store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
@@ -24,10 +24,10 @@ define void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <
}
; FUNC-LABEL: @test_fmax_v4f64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
define void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0
store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
@@ -35,14 +35,14 @@ define void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <
}
; FUNC-LABEL: @test_fmax_v8f64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
define void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0
store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
@@ -50,22 +50,22 @@ define void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <
}
; FUNC-LABEL: @test_fmax_v16f64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: V_MAX_F64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
; SI: v_max_f64
define void @test_fmax_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0
store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128

View File

@@ -9,7 +9,7 @@ declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0
declare double @llvm.maxnum.f64(double, double)
; FUNC-LABEL: @test_fmax_f32
; SI: V_MAX_F32_e32
; SI: v_max_f32_e32
define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float %b) #0
store float %val, float addrspace(1)* %out, align 4
@@ -17,8 +17,8 @@ define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwin
}
; FUNC-LABEL: @test_fmax_v2f32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
@@ -26,10 +26,10 @@ define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
}
; FUNC-LABEL: @test_fmax_v4f32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
@@ -37,14 +37,14 @@ define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
}
; FUNC-LABEL: @test_fmax_v8f32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
@@ -52,22 +52,22 @@ define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
}
; FUNC-LABEL: @test_fmax_v16f32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: V_MAX_F32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
@@ -75,9 +75,9 @@ define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
}
; FUNC-LABEL: @constant_fold_fmax_f32
; SI-NOT: V_MAX_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 2.0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -85,9 +85,9 @@ define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan
; SI-NOT: V_MAX_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x7fc00000
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -95,9 +95,9 @@ define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmax_f32_val_nan
; SI-NOT: V_MAX_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -105,9 +105,9 @@ define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmax_f32_nan_val
; SI-NOT: V_MAX_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -115,9 +115,9 @@ define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0
; SI-NOT: V_MAX_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -125,9 +125,9 @@ define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0
; SI-NOT: V_MAX_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -135,9 +135,9 @@ define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0
; SI-NOT: V_MAX_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -145,9 +145,9 @@ define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0
; SI-NOT: V_MAX_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -155,7 +155,7 @@ define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @fmax_var_immediate_f32
; SI: V_MAX_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -163,7 +163,7 @@ define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
}
; FUNC-LABEL: @fmax_immediate_var_f32
; SI: V_MAX_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
@@ -171,8 +171,8 @@ define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
}
; FUNC-LABEL: @fmax_var_literal_f32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: V_MAX_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -180,8 +180,8 @@ define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
}
; FUNC-LABEL: @fmax_literal_var_f32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: V_MAX_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4

View File

@@ -7,7 +7,7 @@ declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0
declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
; FUNC-LABEL: @test_fmin_f64
; SI: V_MIN_F64
; SI: v_min_f64
define void @test_fmin_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
%val = call double @llvm.minnum.f64(double %a, double %b) #0
store double %val, double addrspace(1)* %out, align 8
@@ -15,8 +15,8 @@ define void @test_fmin_f64(double addrspace(1)* %out, double %a, double %b) noun
}
; FUNC-LABEL: @test_fmin_v2f64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: v_min_f64
; SI: v_min_f64
define void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0
store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
@@ -24,10 +24,10 @@ define void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <
}
; FUNC-LABEL: @test_fmin_v4f64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
define void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0
store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
@@ -35,14 +35,14 @@ define void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <
}
; FUNC-LABEL: @test_fmin_v8f64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
define void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0
store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
@@ -50,22 +50,22 @@ define void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <
}
; FUNC-LABEL: @test_fmin_v16f64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: V_MIN_F64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
; SI: v_min_f64
define void @test_fmin_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0
store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128

View File

@@ -7,7 +7,7 @@ declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #0
declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
; FUNC-LABEL: @test_fmin_f32
; SI: V_MIN_F32_e32
; SI: v_min_f32_e32
define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%val = call float @llvm.minnum.f32(float %a, float %b) #0
store float %val, float addrspace(1)* %out, align 4
@@ -15,8 +15,8 @@ define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwin
}
; FUNC-LABEL: @test_fmin_v2f32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
@@ -24,10 +24,10 @@ define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
}
; FUNC-LABEL: @test_fmin_v4f32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
@@ -35,14 +35,14 @@ define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
}
; FUNC-LABEL: @test_fmin_v8f32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
@@ -50,22 +50,22 @@ define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
}
; FUNC-LABEL: @test_fmin_v16f32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: V_MIN_F32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
@@ -73,9 +73,9 @@ define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
}
; FUNC-LABEL: @constant_fold_fmin_f32
; SI-NOT: V_MIN_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -83,9 +83,9 @@ define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmin_f32_nan_nan
; SI-NOT: V_MIN_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x7fc00000
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -93,9 +93,9 @@ define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmin_f32_val_nan
; SI-NOT: V_MIN_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -103,9 +103,9 @@ define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmin_f32_nan_val
; SI-NOT: V_MIN_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -113,9 +113,9 @@ define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmin_f32_p0_p0
; SI-NOT: V_MIN_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -123,9 +123,9 @@ define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmin_f32_p0_n0
; SI-NOT: V_MIN_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -133,9 +133,9 @@ define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmin_f32_n0_p0
; SI-NOT: V_MIN_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -143,9 +143,9 @@ define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @constant_fold_fmin_f32_n0_n0
; SI-NOT: V_MIN_F32_e32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: BUFFER_STORE_DWORD [[REG]]
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -153,7 +153,7 @@ define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: @fmin_var_immediate_f32
; SI: V_MIN_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float %a, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -161,7 +161,7 @@ define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
}
; FUNC-LABEL: @fmin_immediate_var_f32
; SI: V_MIN_F32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float 2.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
@@ -169,8 +169,8 @@ define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
}
; FUNC-LABEL: @fmin_var_literal_f32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: V_MIN_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float %a, float 99.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -178,8 +178,8 @@ define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
}
; FUNC-LABEL: @fmin_literal_var_f32
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: V_MIN_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float 99.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4

View File

@@ -5,7 +5,7 @@
; FUNC-LABEL: {{^}}fmul_f32:
; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
; SI: V_MUL_F32
; SI: v_mul_f32
define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fmul float %a, %b
@@ -21,8 +21,8 @@ declare void @llvm.AMDGPU.store.output(float, i32)
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
; SI: V_MUL_F32
; SI: V_MUL_F32
; SI: v_mul_f32
; SI: v_mul_f32
define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
entry:
%0 = fmul <2 x float> %a, %b
@@ -36,10 +36,10 @@ entry:
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI: V_MUL_F32
; SI: V_MUL_F32
; SI: V_MUL_F32
; SI: V_MUL_F32
; SI: v_mul_f32
; SI: v_mul_f32
; SI: v_mul_f32
; SI: v_mul_f32
define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1) * %in
@@ -50,9 +50,9 @@ define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
}
; FUNC-LABEL: {{^}}test_mul_2_k:
; SI: V_MUL_F32
; SI-NOT: V_MUL_F32
; SI: S_ENDPGM
; SI: v_mul_f32
; SI-NOT: v_mul_f32
; SI: s_endpgm
define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
%y = fmul float %x, 2.0
%z = fmul float %y, 3.0
@@ -61,10 +61,10 @@ define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
}
; FUNC-LABEL: {{^}}test_mul_2_k_inv:
; SI: V_MUL_F32
; SI-NOT: V_MUL_F32
; SI-NOT: V_MAD_F32
; SI: S_ENDPGM
; SI: v_mul_f32
; SI-NOT: v_mul_f32
; SI-NOT: v_mad_f32
; SI: s_endpgm
define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
%y = fmul float %x, 3.0
%z = fmul float %y, 2.0

View File

@@ -1,7 +1,7 @@
; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
; FUNC-LABEL: {{^}}fmul_f64:
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
@@ -12,8 +12,8 @@ define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; FUNC-LABEL: {{^}}fmul_v2f64:
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2) {
%r0 = load <2 x double> addrspace(1)* %in1
@@ -24,10 +24,10 @@ define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
}
; FUNC-LABEL: {{^}}fmul_v4f64:
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2) {
%r0 = load <4 x double> addrspace(1)* %in1

View File

@@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone
; CHECK-LABEL: {{^}}fmuladd_f32:
; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
; CHECK: v_mad_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
@@ -19,7 +19,7 @@ define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
}
; CHECK-LABEL: {{^}}fmuladd_f64:
; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; CHECK: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
@@ -32,10 +32,10 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
@@ -51,10 +51,10 @@ define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %
}
; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
@@ -70,10 +70,10 @@ define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %
}
; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
define void @fadd_a_a_b_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
@@ -92,10 +92,10 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out,
}
; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
define void @fadd_b_a_a_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
@@ -114,10 +114,10 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
}
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
@@ -134,10 +134,10 @@ define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
@@ -156,10 +156,10 @@ define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspa
; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
@@ -178,10 +178,10 @@ define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1
; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
; CHECK: BUFFER_STORE_DWORD [[RESULT]]
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float addrspace(1)* %out, i32 %tid

View File

@@ -4,9 +4,9 @@
; into 2 modifiers, although theoretically that should work.
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
; SI: V_AND_B32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
@@ -26,7 +26,7 @@ define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)
}
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f64:
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
@@ -45,8 +45,8 @@ define void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f64:
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fabs = call double @fabs(double %bc)
@@ -56,12 +56,12 @@ define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_f64:
; SI: S_LOAD_DWORDX2
; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-DAG: V_OR_B32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
; SI-DAG: V_MOV_B32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
; SI: s_load_dwordx2
; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
; SI: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
define void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
%fsub = fsub double -0.000000e+00, %fabs
@@ -70,10 +70,10 @@ define void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_v2f64:
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-NOT: 0x80000000
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
%fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
@@ -82,12 +82,12 @@ define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in)
}
; FUNC-LABEL: {{^}}fneg_fabs_v4f64:
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-NOT: 0x80000000
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
%fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs

View File

@@ -2,8 +2,8 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
; SI-NOT: AND
; SI: V_SUB_F32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
; SI-NOT: and
; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
@@ -13,9 +13,9 @@ define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
}
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
; SI-NOT: AND
; SI: V_MUL_F32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
; SI-NOT: AND
; SI-NOT: and
; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
; SI-NOT: and
define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
%fabs = call float @llvm.fabs.f32(float %x)
%fsub = fsub float -0.000000e+00, %fabs
@@ -33,8 +33,8 @@ define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
; R600: |PV.{{[XYZW]}}|
; R600: -PV
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @llvm.fabs.f32(float %bc)
@@ -48,8 +48,8 @@ define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
; R600: |PV.{{[XYZW]}}|
; R600: -PV
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @fabs(float %bc)
@@ -59,8 +59,8 @@ define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_f32:
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
%fabs = call float @llvm.fabs.f32(float %in)
%fsub = fsub float -0.000000e+00, %fabs
@@ -69,7 +69,7 @@ define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
}
; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
; SI: V_OR_B32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%val = load float addrspace(1)* %in, align 4
%fabs = call float @llvm.fabs.f32(float %val)
@@ -85,10 +85,10 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
; R600: -PV
; FIXME: SGPR should be used directly for first src operand.
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-NOT: 0x80000000
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -98,12 +98,12 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
; FIXME: SGPR should be used directly for first src operand.
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-NOT: 0x80000000
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs

View File

@@ -1,7 +1,7 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_f64:
; SI: V_XOR_B32
; SI: v_xor_b32
define void @fneg_f64(double addrspace(1)* %out, double %in) {
%fneg = fsub double -0.000000e+00, %in
store double %fneg, double addrspace(1)* %out
@@ -9,8 +9,8 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) {
}
; FUNC-LABEL: {{^}}fneg_v2f64:
; SI: V_XOR_B32
; SI: V_XOR_B32
; SI: v_xor_b32
; SI: v_xor_b32
define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
%fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
store <2 x double> %fneg, <2 x double> addrspace(1)* %out
@@ -23,10 +23,10 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double>
; R600: -PV
; R600: -PV
; SI: V_XOR_B32
; SI: V_XOR_B32
; SI: V_XOR_B32
; SI: V_XOR_B32
; SI: v_xor_b32
; SI: v_xor_b32
; SI: v_xor_b32
; SI: v_xor_b32
define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
%fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
store <4 x double> %fneg, <4 x double> addrspace(1)* %out
@@ -39,7 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
; FUNC-LABEL: {{^}}fneg_free_f64:
; FIXME: Unnecessary copy to VGPRs
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fsub = fsub double 0.0, %bc
@@ -48,9 +48,9 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
}
; SI-LABEL: {{^}}fneg_fold_f64:
; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: XOR
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: xor
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
%fsub = fsub double -0.0, %in
%fmul = fmul double %fsub, %in

View File

@@ -4,7 +4,7 @@
; FUNC-LABEL: {{^}}fneg_f32:
; R600: -PV
; SI: V_XOR_B32
; SI: v_xor_b32
define void @fneg_f32(float addrspace(1)* %out, float %in) {
%fneg = fsub float -0.000000e+00, %in
store float %fneg, float addrspace(1)* %out
@@ -15,8 +15,8 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) {
; R600: -PV
; R600: -PV
; SI: V_XOR_B32
; SI: V_XOR_B32
; SI: v_xor_b32
; SI: v_xor_b32
define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
%fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
store <2 x float> %fneg, <2 x float> addrspace(1)* %out
@@ -29,10 +29,10 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i
; R600: -PV
; R600: -PV
; SI: V_XOR_B32
; SI: V_XOR_B32
; SI: V_XOR_B32
; SI: V_XOR_B32
; SI: v_xor_b32
; SI: v_xor_b32
; SI: v_xor_b32
; SI: v_xor_b32
define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
%fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
store <4 x float> %fneg, <4 x float> addrspace(1)* %out
@@ -47,8 +47,8 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i
; R600-NOT: XOR
; R600: -KC0[2].Z
; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
; SI: V_SUB_F32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
; XXX: We could use v_add_f32_e64 with the negate bit here instead.
; SI: v_sub_f32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fsub = fsub float 0.0, %bc
@@ -57,9 +57,9 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
}
; FUNC-LABEL: {{^}}fneg_fold_f32:
; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: XOR
; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: xor
; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
%fsub = fsub float -0.0, %in
%fmul = fmul float %fsub, %in

View File

@@ -4,9 +4,9 @@ declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
; SI-LABEL: {{^}}test_convert_fp16_to_fp32:
; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: buffer_load_ushort [[VAL:v[0-9]+]]
; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]]
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
@@ -16,10 +16,10 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
; SI-LABEL: {{^}}test_convert_fp16_to_fp64:
; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
; SI: V_CVT_F32_F16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
; SI: V_CVT_F64_F32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
; SI: buffer_load_ushort [[VAL:v[0-9]+]]
; SI: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
; SI: buffer_store_dwordx2 [[RESULT]]
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone

View File

@@ -3,9 +3,9 @@
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
; SI-LABEL: {{^}}test_convert_fp32_to_fp16:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: BUFFER_STORE_SHORT [[RESULT]]
; SI: buffer_load_dword [[VAL:v[0-9]+]]
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_short [[RESULT]]
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float addrspace(1)* %in, align 4
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone

View File

@@ -4,7 +4,7 @@
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: @fp_to_sint_f64_i32
; SI: V_CVT_I32_F64_e32
; SI: v_cvt_i32_f64_e32
define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
%result = fptosi double %in to i32
store i32 %result, i32 addrspace(1)* %out
@@ -12,8 +12,8 @@ define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
}
; FUNC-LABEL: @fp_to_sint_v2f64_v2i32
; SI: V_CVT_I32_F64_e32
; SI: V_CVT_I32_F64_e32
; SI: v_cvt_i32_f64_e32
; SI: v_cvt_i32_f64_e32
define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
%result = fptosi <2 x double> %in to <2 x i32>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
@@ -21,10 +21,10 @@ define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %
}
; FUNC-LABEL: @fp_to_sint_v4f64_v4i32
; SI: V_CVT_I32_F64_e32
; SI: V_CVT_I32_F64_e32
; SI: V_CVT_I32_F64_e32
; SI: V_CVT_I32_F64_e32
; SI: v_cvt_i32_f64_e32
; SI: v_cvt_i32_f64_e32
; SI: v_cvt_i32_f64_e32
; SI: v_cvt_i32_f64_e32
define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
%result = fptosi <4 x double> %in to <4 x i32>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
@@ -32,20 +32,20 @@ define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %
}
; FUNC-LABEL: @fp_to_sint_i64_f64
; CI-DAG: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]]
; CI-DAG: V_TRUNC_F64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
; CI-DAG: S_MOV_B32 s[[K0_LO:[0-9]+]], 0{{$}}
; CI-DAG: S_MOV_B32 s[[K0_HI:[0-9]+]], 0x3df00000
; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
; CI-DAG: V_MUL_F64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
; CI-DAG: V_FLOOR_F64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
; CI-DAG: S_MOV_B32 s[[K1_HI:[0-9]+]], 0xc1f00000
; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
; CI-DAG: V_FMA_F64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
; CI-DAG: V_CVT_U32_F64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: V_CVT_I32_F64_e32 v[[HI:[0-9]+]], [[FLOOR]]
; CI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr double addrspace(1)* %in, i32 %tid

View File

@@ -3,8 +3,8 @@
; FUNC-LABEL: {{^}}fp_to_sint_i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; SI: V_CVT_I32_F32_e32
; SI: S_ENDPGM
; SI: v_cvt_i32_f32_e32
; SI: s_endpgm
define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
%conv = fptosi float %in to i32
store i32 %conv, i32 addrspace(1)* %out
@@ -14,8 +14,8 @@ define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; SI: V_CVT_I32_F32_e32
; SI: V_CVT_I32_F32_e32
; SI: v_cvt_i32_f32_e32
; SI: v_cvt_i32_f32_e32
define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
%result = fptosi <2 x float> %in to <2 x i32>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
@@ -27,10 +27,10 @@ define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; SI: V_CVT_I32_F32_e32
; SI: V_CVT_I32_F32_e32
; SI: V_CVT_I32_F32_e32
; SI: V_CVT_I32_F32_e32
; SI: v_cvt_i32_f32_e32
; SI: v_cvt_i32_f32_e32
; SI: v_cvt_i32_f32_e32
; SI: v_cvt_i32_f32_e32
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in
%result = fptosi <4 x float> %value to <4 x i32>
@@ -63,7 +63,7 @@ define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
; EG-DAG: CNDE_INT
; Check that the compiler doesn't crash with a "cannot select" error
; SI: S_ENDPGM
; SI: s_endpgm
define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
entry:
%0 = fptosi float %in to i64
@@ -115,7 +115,7 @@ entry:
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
; SI: S_ENDPGM
; SI: s_endpgm
define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
%conv = fptosi <2 x float> %x to <2 x i64>
store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
@@ -208,7 +208,7 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
; SI: S_ENDPGM
; SI: s_endpgm
define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
%conv = fptosi <4 x float> %x to <4 x i64>
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out

View File

@@ -4,7 +4,7 @@
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI-LABEL: {{^}}fp_to_uint_i32_f64:
; SI: V_CVT_U32_F64_e32
; SI: v_cvt_u32_f64_e32
define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
%cast = fptoui double %in to i32
store i32 %cast, i32 addrspace(1)* %out, align 4
@@ -12,8 +12,8 @@ define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
}
; SI-LABEL: @fp_to_uint_v2i32_v2f64
; SI: V_CVT_U32_F64_e32
; SI: V_CVT_U32_F64_e32
; SI: v_cvt_u32_f64_e32
; SI: v_cvt_u32_f64_e32
define void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
%cast = fptoui <2 x double> %in to <2 x i32>
store <2 x i32> %cast, <2 x i32> addrspace(1)* %out, align 8
@@ -21,10 +21,10 @@ define void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %
}
; SI-LABEL: @fp_to_uint_v4i32_v4f64
; SI: V_CVT_U32_F64_e32
; SI: V_CVT_U32_F64_e32
; SI: V_CVT_U32_F64_e32
; SI: V_CVT_U32_F64_e32
; SI: v_cvt_u32_f64_e32
; SI: v_cvt_u32_f64_e32
; SI: v_cvt_u32_f64_e32
; SI: v_cvt_u32_f64_e32
define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
%cast = fptoui <4 x double> %in to <4 x i32>
store <4 x i32> %cast, <4 x i32> addrspace(1)* %out, align 8
@@ -32,20 +32,20 @@ define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %
}
; FUNC-LABEL: @fp_to_uint_i64_f64
; CI-DAG: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]]
; CI-DAG: V_TRUNC_F64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
; CI-DAG: S_MOV_B32 s[[K0_LO:[0-9]+]], 0{{$}}
; CI-DAG: S_MOV_B32 s[[K0_HI:[0-9]+]], 0x3df00000
; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
; CI-DAG: V_MUL_F64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
; CI-DAG: V_FLOOR_F64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
; CI-DAG: S_MOV_B32 s[[K1_HI:[0-9]+]], 0xc1f00000
; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
; CI-DAG: V_FMA_F64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
; CI-DAG: V_CVT_U32_F64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: V_CVT_U32_F64_e32 v[[HI:[0-9]+]], [[FLOOR]]
; CI: BUFFER_STORE_DWORDX2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep = getelementptr double addrspace(1)* %in, i32 %tid

View File

@@ -3,8 +3,8 @@
; FUNC-LABEL: {{^}}fp_to_uint_i32:
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; SI: V_CVT_U32_F32_e32
; SI: S_ENDPGM
; SI: v_cvt_u32_f32_e32
; SI: s_endpgm
define void @fp_to_uint_i32 (i32 addrspace(1)* %out, float %in) {
%conv = fptoui float %in to i32
store i32 %conv, i32 addrspace(1)* %out
@@ -14,8 +14,8 @@ define void @fp_to_uint_i32 (i32 addrspace(1)* %out, float %in) {
; FUNC-LABEL: {{^}}fp_to_uint_v2i32:
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; SI: V_CVT_U32_F32_e32
; SI: V_CVT_U32_F32_e32
; SI: v_cvt_u32_f32_e32
; SI: v_cvt_u32_f32_e32
define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
%result = fptoui <2 x float> %in to <2 x i32>
@@ -28,10 +28,10 @@ define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; SI: V_CVT_U32_F32_e32
; SI: V_CVT_U32_F32_e32
; SI: V_CVT_U32_F32_e32
; SI: V_CVT_U32_F32_e32
; SI: v_cvt_u32_f32_e32
; SI: v_cvt_u32_f32_e32
; SI: v_cvt_u32_f32_e32
; SI: v_cvt_u32_f32_e32
define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in
@@ -63,7 +63,7 @@ define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
; SI: S_ENDPGM
; SI: s_endpgm
define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
%conv = fptoui float %x to i64
store i64 %conv, i64 addrspace(1)* %out
@@ -114,7 +114,7 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
; SI: S_ENDPGM
; SI: s_endpgm
define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
%conv = fptoui <2 x float> %x to <2 x i64>
store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
@@ -207,7 +207,7 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
; SI: S_ENDPGM
; SI: s_endpgm
define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
%conv = fptoui <4 x float> %x to <4 x i64>
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out

View File

@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
; CHECK: {{^}}fpext:
; CHECK: V_CVT_F64_F32_e32
; CHECK: v_cvt_f64_f32_e32
define void @fpext(double addrspace(1)* %out, float %in) {
%result = fpext float %in to double
store double %result, double addrspace(1)* %out

View File

@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
; CHECK: {{^}}fptrunc:
; CHECK: V_CVT_F32_F64_e32
; CHECK: v_cvt_f32_f64_e32
define void @fptrunc(float addrspace(1)* %out, double %in) {
%result = fptrunc double %in to float
store float %result, float addrspace(1)* %out

View File

@@ -1,16 +1,16 @@
; RUN: llc -march=r600 -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}frem_f32:
; SI-DAG: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*$}}
; SI-DAG: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
; SI-DAG: V_CMP
; SI-DAG: V_MUL_F32
; SI: V_RCP_F32_e32
; SI: V_MUL_F32_e32
; SI: V_MUL_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_MAD_F32
; SI: S_ENDPGM
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:0x10
; SI-DAG: v_cmp
; SI-DAG: v_mul_f32
; SI: v_rcp_f32_e32
; SI: v_mul_f32_e32
; SI: v_mul_f32_e32
; SI: v_trunc_f32_e32
; SI: v_mad_f32
; SI: s_endpgm
define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #0 {
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
@@ -22,14 +22,14 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
}
; FUNC-LABEL: {{^}}unsafe_frem_f32:
; SI: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*}}
; SI: V_RCP_F32_e32 [[INVY:v[0-9]+]], [[Y]]
; SI: V_MUL_F32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
; SI: V_TRUNC_F32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: S_ENDPGM
; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:0x10
; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #1 {
%gep2 = getelementptr float addrspace(1)* %in2, i32 4
@@ -44,7 +44,7 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
; correctly
; FUNC-LABEL: {{^}}frem_f64:
; SI: S_ENDPGM
; SI: s_endpgm
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
%r0 = load double addrspace(1)* %in1, align 8
@@ -55,11 +55,11 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
}
; FUNC-LABEL: {{^}}unsafe_frem_f64:
; SI: V_RCP_F64_e32
; SI: V_MUL_F64
; SI: V_BFE_U32
; SI: V_FMA_F64
; SI: S_ENDPGM
; SI: v_rcp_f64_e32
; SI: v_mul_f64
; SI: v_bfe_u32
; SI: v_fma_f64
; SI: s_endpgm
define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #1 {
%r0 = load double addrspace(1)* %in1, align 8

View File

@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}fsqrt_f32:
; CHECK: V_SQRT_F32_e32 {{v[0-9]+, v[0-9]+}}
; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%r0 = load float addrspace(1)* %in
@@ -11,7 +11,7 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
}
; CHECK: {{^}}fsqrt_f64:
; CHECK: V_SQRT_F64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r0 = load double addrspace(1)* %in

View File

@@ -3,7 +3,7 @@
; FUNC-LABEL: {{^}}v_fsub_f32:
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%b_ptr = getelementptr float addrspace(1)* %in, i32 1
%a = load float addrspace(1)* %in, align 4
@@ -16,7 +16,7 @@ define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
; FUNC-LABEL: {{^}}s_fsub_f32:
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
; SI: V_SUB_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
%sub = fsub float %a, %b
store float %sub, float addrspace(1)* %out, align 4
@@ -32,8 +32,8 @@ declare void @llvm.AMDGPU.store.output(float, i32)
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
; FIXME: Should be using SGPR directly for first operand
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
%sub = fsub <2 x float> %a, %b
store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
@@ -46,10 +46,10 @@ define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1)* %in, align 16
@@ -62,11 +62,11 @@ define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(
; FIXME: Should be using SGPR directly for first operand
; FUNC-LABEL: {{^}}s_fsub_v4f32:
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: S_ENDPGM
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: s_endpgm
define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
%result = fsub <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16

View File

@@ -1,7 +1,7 @@
; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}fsub_f64:
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1

View File

@@ -9,9 +9,9 @@ declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
; FUNC-LABEL: {{^}}v_ftrunc_f64:
; CI: V_TRUNC_F64
; SI: V_BFE_U32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
; SI: S_ENDPGM
; CI: v_trunc_f64
; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
; SI: s_endpgm
define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%x = load double addrspace(1)* %in, align 8
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
@@ -20,21 +20,21 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
}
; FUNC-LABEL: {{^}}ftrunc_f64:
; CI: V_TRUNC_F64_e32
; CI: v_trunc_f64_e32
; SI: S_BFE_U32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: S_LSHR_B64
; SI: S_NOT_B64
; SI: S_AND_B64
; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI: CMP_LT_I32
; SI: CNDMASK_B32
; SI: CNDMASK_B32
; SI: CMP_GT_I32
; SI: CNDMASK_B32
; SI: CNDMASK_B32
; SI: S_ENDPGM
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: s_lshr_b64
; SI: s_not_b64
; SI: s_and_b64
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI: cmp_lt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: cmp_gt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: s_endpgm
define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
@@ -42,8 +42,8 @@ define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
}
; FUNC-LABEL: {{^}}ftrunc_v2f64:
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
@@ -51,9 +51,9 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
}
; FIXME-FUNC-LABEL: {{^}}ftrunc_v3f64:
; FIXME-CI: V_TRUNC_F64_e32
; FIXME-CI: V_TRUNC_F64_e32
; FIXME-CI: V_TRUNC_F64_e32
; FIXME-CI: v_trunc_f64_e32
; FIXME-CI: v_trunc_f64_e32
; FIXME-CI: v_trunc_f64_e32
; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
@@ -61,10 +61,10 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
; }
; FUNC-LABEL: {{^}}ftrunc_v4f64:
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
@@ -72,14 +72,14 @@ define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
}
; FUNC-LABEL: {{^}}ftrunc_v8f64:
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
@@ -87,22 +87,22 @@ define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
}
; FUNC-LABEL: {{^}}ftrunc_v16f64:
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: V_TRUNC_F64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
; CI: v_trunc_f64_e32
define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out

View File

@@ -10,7 +10,7 @@ declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone
; FUNC-LABEL: {{^}}ftrunc_f32:
; EG: TRUNC
; SI: V_TRUNC_F32_e32
; SI: v_trunc_f32_e32
define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
%y = call float @llvm.trunc.f32(float %x) nounwind readnone
store float %y, float addrspace(1)* %out
@@ -20,8 +20,8 @@ define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
; FUNC-LABEL: {{^}}ftrunc_v2f32:
; EG: TRUNC
; EG: TRUNC
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
%y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone
store <2 x float> %y, <2 x float> addrspace(1)* %out
@@ -32,9 +32,9 @@ define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
; FIXME-EG: TRUNC
; FIXME-EG: TRUNC
; FIXME-EG: TRUNC
; FIXME-SI: V_TRUNC_F32_e32
; FIXME-SI: V_TRUNC_F32_e32
; FIXME-SI: V_TRUNC_F32_e32
; FIXME-SI: v_trunc_f32_e32
; FIXME-SI: v_trunc_f32_e32
; FIXME-SI: v_trunc_f32_e32
; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
; %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone
; store <3 x float> %y, <3 x float> addrspace(1)* %out
@@ -46,10 +46,10 @@ define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
%y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
store <4 x float> %y, <4 x float> addrspace(1)* %out
@@ -65,14 +65,14 @@ define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
%y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone
store <8 x float> %y, <8 x float> addrspace(1)* %out
@@ -96,22 +96,22 @@ define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: V_TRUNC_F32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
; SI: v_trunc_f32_e32
define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
%y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone
store <16 x float> %y, <16 x float> addrspace(1)* %out

View File

@@ -3,8 +3,8 @@
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: {{^}}use_gep_address_space:
; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}} offset:64
; CHECK: v_mov_b32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: ds_write_b32 [[PTR]], v{{[0-9]+}} offset:64
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void
@@ -14,9 +14,9 @@ define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %arra
; CHECK-LABEL: {{^}}use_gep_address_space_large_offset:
; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
; SI, which is why it is being OR'd with the base pointer.
; SI: S_OR_B32
; CI: S_ADD_I32
; CHECK: DS_WRITE_B32
; SI: s_or_b32
; CI: s_add_i32
; CHECK: ds_write_b32
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
store i32 99, i32 addrspace(3)* %p
ret void
@@ -24,10 +24,10 @@ define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %arra
define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
; CHECK-LABEL: {{^}}gep_as_vector_v4:
; CHECK: S_ADD_I32
; CHECK: S_ADD_I32
; CHECK: S_ADD_I32
; CHECK: S_ADD_I32
; CHECK: s_add_i32
; CHECK: s_add_i32
; CHECK: s_add_i32
; CHECK: s_add_i32
%p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
%p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
@@ -42,8 +42,8 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
; CHECK-LABEL: {{^}}gep_as_vector_v2:
; CHECK: S_ADD_I32
; CHECK: S_ADD_I32
; CHECK: s_add_i32
; CHECK: s_add_i32
%p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
%p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1

View File

@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; FUNC-LABEL: {{^}}atomic_add_i32_offset:
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -10,8 +10,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -21,7 +21,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -31,8 +31,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -43,7 +43,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32:
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
@@ -51,8 +51,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret:
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
@@ -61,7 +61,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -70,8 +70,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -81,7 +81,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_offset:
; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -90,8 +90,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -101,7 +101,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -111,8 +111,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -123,7 +123,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32:
; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
@@ -131,8 +131,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret:
; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
@@ -141,7 +141,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
; SI: BUFFER_ATOMIC_AND v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -150,8 +150,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
; SI: BUFFER_ATOMIC_AND [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -161,7 +161,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -170,8 +170,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -181,7 +181,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -191,8 +191,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -203,7 +203,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32:
; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
@@ -211,8 +211,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
@@ -221,7 +221,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
; SI: BUFFER_ATOMIC_SUB v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -230,8 +230,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
; SI: BUFFER_ATOMIC_SUB [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -241,7 +241,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_offset:
; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -250,8 +250,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -261,7 +261,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -271,8 +271,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -283,7 +283,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32:
; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
@@ -291,8 +291,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret:
; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
@@ -301,7 +301,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
; SI: BUFFER_ATOMIC_SMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -310,8 +310,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
; SI: BUFFER_ATOMIC_SMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -321,7 +321,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -330,8 +330,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -341,7 +341,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -351,8 +351,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -363,7 +363,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32:
; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
@@ -371,8 +371,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
@@ -381,7 +381,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
; SI: BUFFER_ATOMIC_UMAX v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -390,8 +390,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
; SI: BUFFER_ATOMIC_UMAX [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -401,7 +401,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_offset:
; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -410,8 +410,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -421,7 +421,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -431,8 +431,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -443,7 +443,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32:
; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
@@ -451,8 +451,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret:
; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
@@ -461,7 +461,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
; SI: BUFFER_ATOMIC_SMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -470,8 +470,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
; SI: BUFFER_ATOMIC_SMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -481,7 +481,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -490,8 +490,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -501,7 +501,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -511,8 +511,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -523,7 +523,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32:
; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
@@ -531,8 +531,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
@@ -541,7 +541,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
; SI: BUFFER_ATOMIC_UMIN v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -550,8 +550,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
; SI: BUFFER_ATOMIC_UMIN [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -561,7 +561,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_offset:
; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -570,8 +570,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -581,7 +581,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -591,8 +591,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -603,7 +603,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32:
; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
@@ -611,8 +611,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret:
; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
@@ -621,7 +621,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
; SI: BUFFER_ATOMIC_OR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -630,8 +630,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
; SI: BUFFER_ATOMIC_OR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -641,7 +641,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -650,8 +650,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -661,7 +661,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -671,8 +671,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -683,7 +683,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32:
; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
@@ -691,8 +691,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
@@ -701,7 +701,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
; SI: BUFFER_ATOMIC_SWAP v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -710,8 +710,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
; SI: BUFFER_ATOMIC_SWAP [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -721,7 +721,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}}
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -730,8 +730,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%gep = getelementptr i32 addrspace(1)* %out, i32 4
@@ -741,7 +741,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}}
define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -751,8 +751,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -763,7 +763,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32:
; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
@@ -771,8 +771,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
@@ -781,7 +781,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
; SI: BUFFER_ATOMIC_XOR v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
@@ -790,8 +790,8 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
; SI: BUFFER_ATOMIC_XOR [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index

View File

@@ -6,8 +6,8 @@
; FUNC-LABEL: {{^}}test_i8:
; EG: CF_END
; SI: BUFFER_STORE_BYTE
; SI: S_ENDPGM
; SI: buffer_store_byte
; SI: s_endpgm
define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
%arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
%1 = load i8 addrspace(2)* %arrayidx, align 1
@@ -19,8 +19,8 @@ define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
; FUNC-LABEL: {{^}}test_i16:
; EG: CF_END
; SI: BUFFER_STORE_SHORT
; SI: S_ENDPGM
; SI: buffer_store_short
; SI: s_endpgm
define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
%arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
%1 = load i16 addrspace(2)* %arrayidx, align 2

View File

@@ -7,8 +7,8 @@
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
; FUNC-LABEL: {{^}}float:
; FIXME: We should be using S_LOAD_DWORD here.
; SI: BUFFER_LOAD_DWORD
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@@ -29,8 +29,8 @@ entry:
; FUNC-LABEL: {{^}}i32:
; FIXME: We should be using S_LOAD_DWORD here.
; SI: BUFFER_LOAD_DWORD
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@@ -53,7 +53,7 @@ entry:
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
; FUNC-LABEL: {{^}}struct_foo_gv_load:
; SI: S_LOAD_DWORD
; SI: s_load_dword
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
@@ -68,8 +68,8 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
<1 x i32> <i32 4> ]
; FUNC-LABEL: {{^}}array_v1_gv_load:
; FIXME: We should be using S_LOAD_DWORD here.
; SI: BUFFER_LOAD_DWORD
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
%load = load <1 x i32> addrspace(2)* %gep, align 4

View File

@@ -2,8 +2,8 @@
define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_load_store:
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
; CHECK: BUFFER_STORE_SHORT [[TMP]]
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
%val = load half addrspace(1)* %in
store half %val, half addrspace(1) * %out
ret void
@@ -11,8 +11,8 @@ define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_bitcast_from_half:
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
; CHECK: BUFFER_STORE_SHORT [[TMP]]
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
%val = load half addrspace(1) * %in
%val_int = bitcast half %val to i16
store i16 %val_int, i16 addrspace(1)* %out
@@ -21,8 +21,8 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
; CHECK-LABEL: {{^}}test_bitcast_to_half:
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
; CHECK: BUFFER_STORE_SHORT [[TMP]]
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
%val = load i16 addrspace(1)* %in
%val_fp = bitcast i16 %val to half
store half %val_fp, half addrspace(1)* %out
@@ -31,7 +31,7 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_extend32:
; CHECK: V_CVT_F32_F16_e32
; CHECK: v_cvt_f32_f16_e32
%val16 = load half addrspace(1)* %in
%val32 = fpext half %val16 to float
@@ -41,8 +41,8 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_extend64:
; CHECK: V_CVT_F32_F16_e32
; CHECK: V_CVT_F64_F32_e32
; CHECK: v_cvt_f32_f16_e32
; CHECK: v_cvt_f64_f32_e32
%val16 = load half addrspace(1)* %in
%val64 = fpext half %val16 to double
@@ -52,7 +52,7 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_trunc32:
; CHECK: V_CVT_F16_F32_e32
; CHECK: v_cvt_f16_f32_e32
%val32 = load float addrspace(1)* %in
%val16 = fptrunc float %val32 to half

View File

@@ -1,7 +1,7 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}test_i64_eq:
; SI: V_CMP_EQ_I64
; SI: v_cmp_eq_i64
define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp eq i64 %a, %b
%result = sext i1 %cmp to i32
@@ -10,7 +10,7 @@ define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_ne:
; SI: V_CMP_NE_I64
; SI: v_cmp_ne_i64
define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ne i64 %a, %b
%result = sext i1 %cmp to i32
@@ -19,7 +19,7 @@ define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_slt:
; SI: V_CMP_LT_I64
; SI: v_cmp_lt_i64
define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp slt i64 %a, %b
%result = sext i1 %cmp to i32
@@ -28,7 +28,7 @@ define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_ult:
; SI: V_CMP_LT_U64
; SI: v_cmp_lt_u64
define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ult i64 %a, %b
%result = sext i1 %cmp to i32
@@ -37,7 +37,7 @@ define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_sle:
; SI: V_CMP_LE_I64
; SI: v_cmp_le_i64
define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sle i64 %a, %b
%result = sext i1 %cmp to i32
@@ -46,7 +46,7 @@ define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_ule:
; SI: V_CMP_LE_U64
; SI: v_cmp_le_u64
define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ule i64 %a, %b
%result = sext i1 %cmp to i32
@@ -55,7 +55,7 @@ define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_sgt:
; SI: V_CMP_GT_I64
; SI: v_cmp_gt_i64
define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sgt i64 %a, %b
%result = sext i1 %cmp to i32
@@ -64,7 +64,7 @@ define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_ugt:
; SI: V_CMP_GT_U64
; SI: v_cmp_gt_u64
define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ugt i64 %a, %b
%result = sext i1 %cmp to i32
@@ -73,7 +73,7 @@ define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_sge:
; SI: V_CMP_GE_I64
; SI: v_cmp_ge_i64
define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sge i64 %a, %b
%result = sext i1 %cmp to i32
@@ -82,7 +82,7 @@ define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
}
; SI-LABEL: {{^}}test_i64_uge:
; SI: V_CMP_GE_U64
; SI: v_cmp_ge_u64
define void @test_i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp uge i64 %a, %b
%result = sext i1 %cmp to i32

View File

@@ -2,9 +2,9 @@
; Use a 64-bit value with lo bits that can be represented as an inline constant
; CHECK-LABEL: {{^}}i64_imm_inline_lo:
; CHECK: S_MOV_B32 [[LO:s[0-9]+]], 5
; CHECK: V_MOV_B32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
; CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_VGPR]]:
; CHECK: s_mov_b32 [[LO:s[0-9]+]], 5
; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]:
define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
entry:
store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
@@ -13,9 +13,9 @@ entry:
; Use a 64-bit value with hi bits that can be represented as an inline constant
; CHECK-LABEL: {{^}}i64_imm_inline_hi:
; CHECK: S_MOV_B32 [[HI:s[0-9]+]], 5
; CHECK: V_MOV_B32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
; CHECK: BUFFER_STORE_DWORDX2 v{{\[[0-9]+:}}[[HI_VGPR]]
; CHECK: s_mov_b32 [[HI:s[0-9]+]], 5
; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
; CHECK: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]]
define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
entry:
store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
@@ -23,89 +23,89 @@ entry:
}
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
store float 0.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0.5{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
store float 0.5, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -0.5{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
store float -0.5, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
store float 1.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -1.0{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
store float -1.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 2.0{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
store float 2.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -2.0{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
store float -2.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 4.0{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
store float 4.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -4.0{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
store float -4.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}store_literal_imm_f32:
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x45800000
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @store_literal_imm_f32(float addrspace(1)* %out) {
store float 4096.0, float addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0.0
store float %y, float addrspace(1)* %out
@@ -113,9 +113,9 @@ define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0.5
store float %y, float addrspace(1)* %out
@@ -123,9 +123,9 @@ define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -0.5
store float %y, float addrspace(1)* %out
@@ -133,9 +133,9 @@ define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 1.0
store float %y, float addrspace(1)* %out
@@ -143,9 +143,9 @@ define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -1.0
store float %y, float addrspace(1)* %out
@@ -153,9 +153,9 @@ define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 2.0
store float %y, float addrspace(1)* %out
@@ -163,9 +163,9 @@ define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -2.0
store float %y, float addrspace(1)* %out
@@ -173,9 +173,9 @@ define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 4.0
store float %y, float addrspace(1)* %out
@@ -183,9 +183,9 @@ define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -4.0
store float %y, float addrspace(1)* %out
@@ -193,9 +193,9 @@ define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
}
; CHECK-LABEL: @commute_add_inline_imm_0.5_f32
; CHECK: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; CHECK: V_ADD_F32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%x = load float addrspace(1)* %in
%y = fadd float %x, 0.5
@@ -204,9 +204,9 @@ define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addr
}
; CHECK-LABEL: @commute_add_literal_f32
; CHECK: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; CHECK: V_ADD_F32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
; CHECK-NEXT: buffer_store_dword [[REG]]
define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%x = load float addrspace(1)* %in
%y = fadd float %x, 1024.0

View File

@@ -4,8 +4,8 @@
; indexing of vectors.
; CHECK: extract_w_offset
; CHECK: S_MOV_B32 m0
; CHECK-NEXT: V_MOVRELS_B32_e32
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movrels_b32_e32
define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = add i32 %in, 1
@@ -15,8 +15,8 @@ entry:
}
; CHECK: extract_wo_offset
; CHECK: S_MOV_B32 m0
; CHECK-NEXT: V_MOVRELS_B32_e32
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movrels_b32_e32
define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
@@ -25,8 +25,8 @@ entry:
}
; CHECK: insert_w_offset
; CHECK: S_MOV_B32 m0
; CHECK-NEXT: V_MOVRELD_B32_e32
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = add i32 %in, 1
@@ -37,8 +37,8 @@ entry:
}
; CHECK: insert_wo_offset
; CHECK: S_MOV_B32 m0
; CHECK-NEXT: V_MOVRELD_B32_e32
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in

View File

@@ -6,11 +6,11 @@ declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
; SI-LABEL: {{^}}private_access_f64_alloca:
; SI-ALLOCA: BUFFER_STORE_DWORDX2
; SI-ALLOCA: BUFFER_LOAD_DWORDX2
; SI-ALLOCA: buffer_store_dwordx2
; SI-ALLOCA: buffer_load_dwordx2
; SI-PROMOTE: DS_WRITE_B64
; SI-PROMOTE: DS_READ_B64
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load double addrspace(1)* %in, align 8
%array = alloca double, i32 16, align 8
@@ -24,17 +24,17 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
; SI-LABEL: {{^}}private_access_v2f64_alloca:
; SI-ALLOCA: BUFFER_STORE_DWORDX4
; SI-ALLOCA: BUFFER_LOAD_DWORDX4
; SI-ALLOCA: buffer_store_dwordx4
; SI-ALLOCA: buffer_load_dwordx4
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x double> addrspace(1)* %in, align 16
%array = alloca <2 x double>, i32 16, align 16
@@ -48,11 +48,11 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
; SI-LABEL: {{^}}private_access_i64_alloca:
; SI-ALLOCA: BUFFER_STORE_DWORDX2
; SI-ALLOCA: BUFFER_LOAD_DWORDX2
; SI-ALLOCA: buffer_store_dwordx2
; SI-ALLOCA: buffer_load_dwordx2
; SI-PROMOTE: DS_WRITE_B64
; SI-PROMOTE: DS_READ_B64
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%array = alloca i64, i32 16, align 8
@@ -66,17 +66,17 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
; SI-LABEL: {{^}}private_access_v2i64_alloca:
; SI-ALLOCA: BUFFER_STORE_DWORDX4
; SI-ALLOCA: BUFFER_LOAD_DWORDX4
; SI-ALLOCA: buffer_store_dwordx4
; SI-ALLOCA: buffer_load_dwordx4
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x i64> addrspace(1)* %in, align 16
%array = alloca <2 x i64>, i32 16, align 16

View File

@@ -1,11 +1,11 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}infinite_loop:
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
; SI: BB0_1:
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: S_WAITCNT vmcnt(0) expcnt(0)
; SI: S_BRANCH BB0_1
; SI: buffer_store_dword [[REG]]
; SI: s_waitcnt vmcnt(0) expcnt(0)
; SI: s_branch BB0_1
define void @infinite_loop(i32 addrspace(1)* %out) {
entry:
br label %for.body

View File

@@ -9,11 +9,11 @@
; not just directly into the vector component?
; SI-LABEL: {{^}}insertelement_v4f32_0:
; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
; V_MOV_B32_e32
; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
; s_load_dwordx4 s{{[}}[[LOW_REG:[0-9]+]]:
; v_mov_b32_e32
; v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
; v_mov_b32_e32 v[[LOW_REG]], [[CONSTREG]]
; buffer_store_dwordx4 v{{[}}[[LOW_REG]]:
define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
@@ -49,9 +49,9 @@ define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) n
}
; SI-LABEL: {{^}}dynamic_insertelement_v2f32:
; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 0x40a00000
; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
; SI: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
%vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
@@ -59,9 +59,9 @@ define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x fl
}
; SI-LABEL: {{^}}dynamic_insertelement_v4f32:
; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 0x40a00000
; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
; SI: buffer_store_dwordx4 {{v\[}}[[LOW_RESULT_REG]]:
define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
@@ -69,8 +69,8 @@ define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x fl
}
; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: buffer_store_dwordx4
; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
@@ -78,10 +78,10 @@ define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x fl
}
; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: buffer_store_dwordx4
; FIXMESI: buffer_store_dwordx4
; FIXMESI: buffer_store_dwordx4
; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
%vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
@@ -89,7 +89,7 @@ define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x
}
; SI-LABEL: {{^}}dynamic_insertelement_v2i32:
; SI: BUFFER_STORE_DWORDX2
; SI: buffer_store_dwordx2
define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i32> %a, i32 5, i32 %b
store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
@@ -97,7 +97,7 @@ define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32>
}
; SI-LABEL: {{^}}dynamic_insertelement_v4i32:
; SI: BUFFER_STORE_DWORDX4
; SI: buffer_store_dwordx4
define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i32> %a, i32 5, i32 %b
store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
@@ -105,8 +105,8 @@ define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32>
}
; SI-LABEL: {{^}}dynamic_insertelement_v8i32:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: buffer_store_dwordx4
; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <8 x i32> %a, i32 5, i32 %b
store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
@@ -114,10 +114,10 @@ define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32>
}
; SI-LABEL: {{^}}dynamic_insertelement_v16i32:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: buffer_store_dwordx4
; FIXMESI: buffer_store_dwordx4
; FIXMESI: buffer_store_dwordx4
; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <16 x i32> %a, i32 5, i32 %b
store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
@@ -126,7 +126,7 @@ define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i
; SI-LABEL: {{^}}dynamic_insertelement_v2i16:
; FIXMESI: BUFFER_STORE_DWORDX2
; FIXMESI: buffer_store_dwordx2
define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i16> %a, i16 5, i32 %b
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
@@ -134,7 +134,7 @@ define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
}
; SI-LABEL: {{^}}dynamic_insertelement_v4i16:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i16> %a, i16 5, i32 %b
store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
@@ -151,7 +151,7 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a
}
; SI-LABEL: {{^}}dynamic_insertelement_v4i8:
; FIXMESI: BUFFER_STORE_DWORD
; FIXMESI: buffer_store_dword
define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i8> %a, i8 5, i32 %b
store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
@@ -159,7 +159,7 @@ define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a
}
; SI-LABEL: {{^}}dynamic_insertelement_v8i8:
; FIXMESI: BUFFER_STORE_DWORDX2
; FIXMESI: buffer_store_dwordx2
define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <8 x i8> %a, i8 5, i32 %b
store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
@@ -167,7 +167,7 @@ define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a
}
; SI-LABEL: {{^}}dynamic_insertelement_v16i8:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <16 x i8> %a, i8 5, i32 %b
store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
@@ -201,11 +201,11 @@ endif:
}
; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: S_ENDPGM
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
%vecins = insertelement <2 x double> %a, double 8.0, i32 %b
store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
@@ -213,9 +213,9 @@ define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x d
}
; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: S_ENDPGM
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i64> %a, i64 5, i32 %b
store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
@@ -223,11 +223,11 @@ define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64>
}
; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: S_ENDPGM
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
%vecins = insertelement <4 x double> %a, double 8.0, i32 %b
store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
@@ -235,15 +235,15 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d
}
; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: S_ENDPGM
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
%vecins = insertelement <8 x double> %a, double 8.0, i32 %b
store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16

View File

@@ -5,7 +5,7 @@
; EG-CHECK-LABEL: {{^}}i8_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i8_arg:
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: buffer_load_ubyte
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
@@ -17,7 +17,7 @@ entry:
; EG-CHECK-LABEL: {{^}}i8_zext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i8_zext_arg:
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
entry:
@@ -29,7 +29,7 @@ entry:
; EG-CHECK-LABEL: {{^}}i8_sext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i8_sext_arg:
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
entry:
@@ -41,7 +41,7 @@ entry:
; EG-CHECK-LABEL: {{^}}i16_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i16_arg:
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: buffer_load_ushort
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
entry:
@@ -53,7 +53,7 @@ entry:
; EG-CHECK-LABEL: {{^}}i16_zext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i16_zext_arg:
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
entry:
@@ -65,7 +65,7 @@ entry:
; EG-CHECK-LABEL: {{^}}i16_sext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i16_sext_arg:
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
entry:
@@ -77,7 +77,7 @@ entry:
; EG-CHECK-LABEL: {{^}}i32_arg:
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}i32_arg:
; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
entry:
store i32 %in, i32 addrspace(1)* %out, align 4
@@ -87,7 +87,7 @@ entry:
; EG-CHECK-LABEL: {{^}}f32_arg:
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: {{^}}f32_arg:
; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
; s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
entry:
store float %in, float addrspace(1)* %out, align 4
@@ -98,8 +98,8 @@ entry:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; SI-CHECK-LABEL: {{^}}v2i8_arg:
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
entry:
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
@@ -110,8 +110,8 @@ entry:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; SI-CHECK-LABEL: {{^}}v2i16_arg:
; SI-CHECK-DAG: BUFFER_LOAD_USHORT
; SI-CHECK-DAG: BUFFER_LOAD_USHORT
; SI-CHECK-DAG: buffer_load_ushort
; SI-CHECK-DAG: buffer_load_ushort
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
@@ -122,7 +122,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI-CHECK-LABEL: {{^}}v2i32_arg:
; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
entry:
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
@@ -133,7 +133,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI-CHECK-LABEL: {{^}}v2f32_arg:
; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
entry:
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
@@ -166,7 +166,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI-CHECK-LABEL: {{^}}v3i32_arg:
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
entry:
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
@@ -178,7 +178,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI-CHECK-LABEL: {{^}}v3f32_arg:
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
entry:
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
@@ -191,10 +191,10 @@ entry:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; SI-CHECK-LABEL: {{^}}v4i8_arg:
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
@@ -207,10 +207,10 @@ entry:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; SI-CHECK-LABEL: {{^}}v4i16_arg:
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
entry:
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
@@ -223,7 +223,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI-CHECK-LABEL: {{^}}v4i32_arg:
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
@@ -236,7 +236,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI-CHECK-LABEL: {{^}}v4f32_arg:
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
@@ -253,13 +253,13 @@ entry:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; SI-CHECK-LABEL: {{^}}v8i8_arg:
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
entry:
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
@@ -276,14 +276,14 @@ entry:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; SI-CHECK-LABEL: {{^}}v8i16_arg:
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
entry:
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
@@ -300,7 +300,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI-CHECK-LABEL: {{^}}v8i32_arg:
; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
entry:
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
@@ -317,7 +317,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI-CHECK-LABEL: {{^}}v8f32_arg:
; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
entry:
store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
@@ -342,22 +342,22 @@ entry:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; SI-CHECK-LABEL: {{^}}v16i8_arg:
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: BUFFER_LOAD_UBYTE
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
; SI-CHECK: buffer_load_ubyte
define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
entry:
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
@@ -382,22 +382,22 @@ entry:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; SI-CHECK-LABEL: {{^}}v16i16_arg:
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: BUFFER_LOAD_USHORT
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
; SI-CHECK: buffer_load_ushort
define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
entry:
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
@@ -422,7 +422,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI-CHECK-LABEL: {{^}}v16i32_arg:
; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
entry:
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
@@ -447,7 +447,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI-CHECK-LABEL: {{^}}v16f32_arg:
; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
entry:
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
@@ -455,18 +455,18 @@ entry:
}
; FUNC-LABEL: {{^}}kernel_arg_i64:
; SI: S_LOAD_DWORDX2
; SI: S_LOAD_DWORDX2
; SI: BUFFER_STORE_DWORDX2
; SI: s_load_dwordx2
; SI: s_load_dwordx2
; SI: buffer_store_dwordx2
define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
store i64 %a, i64 addrspace(1)* %out, align 8
ret void
}
; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
; XSI: S_LOAD_DWORDX2
; XSI: S_LOAD_DWORDX2
; XSI: BUFFER_STORE_DWORDX2
; XSI: s_load_dwordx2
; XSI: s_load_dwordx2
; XSI: buffer_store_dwordx2
; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
; ret void

View File

@@ -1,5 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI < %s
; CHECK: S_ENDPGM
; CHECK: s_endpgm
@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4

View File

@@ -7,9 +7,9 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
; FUNC-LABEL: {{^}}s_abs_i32:
; SI: S_SUB_I32
; SI: S_MAX_I32
; SI: S_ENDPGM
; SI: s_sub_i32
; SI: s_max_i32
; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT
@@ -20,9 +20,9 @@ define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
}
; FUNC-LABEL: {{^}}v_abs_i32:
; SI: V_SUB_I32_e32
; SI: V_MAX_I32_e32
; SI: S_ENDPGM
; SI: v_sub_i32_e32
; SI: v_max_i32_e32
; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT
@@ -34,9 +34,9 @@ define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind
}
; FUNC-LABEL: {{^}}abs_i32_legacy_amdil:
; SI: V_SUB_I32_e32
; SI: V_MAX_I32_e32
; SI: S_ENDPGM
; SI: v_sub_i32_e32
; SI: v_max_i32_e32
; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT

View File

@@ -3,7 +3,7 @@
; FUNC-LABEL: {{^}}test_barrier_global:
; EG: GROUP_BARRIER
; SI: S_BARRIER
; SI: s_barrier
define void @test_barrier_global(i32 addrspace(1)* %out) {
entry:

View File

@@ -3,7 +3,7 @@
; FUNC-LABEL: {{^}}test_barrier_local:
; EG: GROUP_BARRIER
; SI: S_BARRIER
; SI: s_barrier
define void @test_barrier_local(i32 addrspace(1)* %out) {
entry:

View File

@@ -4,7 +4,7 @@
declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg:
; SI: V_BFE_I32
; SI: v_bfe_i32
; EG: BFE_INT
; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
@@ -14,7 +14,7 @@ define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i
}
; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm:
; SI: V_BFE_I32
; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -23,7 +23,7 @@ define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) n
}
; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg:
; SI: V_BFE_I32
; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -32,7 +32,7 @@ define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) n
}
; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg:
; SI: V_BFE_I32
; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
@@ -41,7 +41,7 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
}
; FUNC-LABEL: {{^}}v_bfe_print_arg:
; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
%load = load i32 addrspace(1)* %src0, align 4
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
@@ -50,8 +50,8 @@ define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) no
}
; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
@@ -60,8 +60,8 @@ define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i
}
; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
@@ -70,9 +70,9 @@ define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
}
; FUNC-LABEL: {{^}}bfe_i32_test_6:
; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: S_ENDPGM
; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: s_endpgm
define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -82,11 +82,11 @@ define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_i32_test_7:
; SI-NOT: SHL
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: shl
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -97,9 +97,9 @@ define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; FIXME: The shifts should be 1 BFE
; FUNC-LABEL: {{^}}bfe_i32_test_8:
; SI: BUFFER_LOAD_DWORD
; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
; SI: s_endpgm
define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -109,10 +109,10 @@ define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_i32_test_9:
; SI-NOT: BFE
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
@@ -121,10 +121,10 @@ define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_i32_test_10:
; SI-NOT: BFE
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
@@ -133,10 +133,10 @@ define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_i32_test_11:
; SI-NOT: BFE
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
@@ -145,10 +145,10 @@ define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_i32_test_12:
; SI-NOT: BFE
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
@@ -157,9 +157,9 @@ define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_i32_test_13:
; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
@@ -168,9 +168,9 @@ define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_i32_test_14:
; SI-NOT: LSHR
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: lshr
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
@@ -179,10 +179,10 @@ define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
@@ -191,10 +191,10 @@ define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
@@ -203,10 +203,10 @@ define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
@@ -215,10 +215,10 @@ define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
@@ -227,10 +227,10 @@ define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
@@ -239,10 +239,10 @@ define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
@@ -251,10 +251,10 @@ define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
@@ -263,10 +263,10 @@ define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
@@ -275,10 +275,10 @@ define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
@@ -287,10 +287,10 @@ define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
@@ -299,10 +299,10 @@ define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
@@ -311,10 +311,10 @@ define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
@@ -323,10 +323,10 @@ define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
@@ -335,10 +335,10 @@ define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
@@ -347,10 +347,10 @@ define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
@@ -359,10 +359,10 @@ define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
@@ -371,10 +371,10 @@ define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
@@ -383,10 +383,10 @@ define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
@@ -395,10 +395,10 @@ define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
@@ -409,13 +409,13 @@ define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
; XXX - This should really be a single BFE, but the sext_inreg of the
; extended type i24 is never custom lowered.
; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
; SI: buffer_load_dword [[LOAD:v[0-9]+]],
; SI: v_lshlrev_b32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
; XSI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
; XSI-NOT: SHL
; XSI-NOT: SHR
; XSI: BUFFER_STORE_DWORD [[BFE]],
; XSI: buffer_store_dword [[BFE]],
define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
@@ -426,12 +426,12 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
}
; FUNC-LABEL: @simplify_demanded_bfe_sdiv
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]]
; SI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
; SI: V_LSHRREV_B32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
; SI: V_ADD_I32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
; SI: V_ASHRREV_I32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
; SI: BUFFER_STORE_DWORD [[TMP2]]
; SI: buffer_load_dword [[LOAD:v[0-9]+]]
; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
; SI: buffer_store_dword [[TMP2]]
define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%src = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone

View File

@@ -4,7 +4,7 @@
declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
; SI: V_BFE_U32
; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
@@ -13,7 +13,7 @@ define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i
}
; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
; SI: V_BFE_U32
; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -22,7 +22,7 @@ define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) n
}
; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
; SI: V_BFE_U32
; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -31,7 +31,7 @@ define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) n
}
; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
; SI: V_BFE_U32
; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
@@ -40,8 +40,8 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
}
; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
@@ -50,8 +50,8 @@ define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i
}
; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
@@ -60,9 +60,9 @@ define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
}
; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
; SI: BUFFER_LOAD_UBYTE
; SI-NOT: BFE
; SI: S_ENDPGM
; SI: buffer_load_ubyte
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%load = load i8 addrspace(1)* %in
%ext = zext i8 %load to i32
@@ -72,11 +72,11 @@ define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) n
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
; SI: BUFFER_LOAD_DWORD
; SI: V_ADD_I32
; SI-NEXT: V_AND_B32_e32
; SI-NOT: BFE
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI: v_add_i32
; SI-NEXT: v_and_b32_e32
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -87,11 +87,11 @@ define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %i
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
; SI: BUFFER_LOAD_DWORD
; SI: V_ADD_I32
; SI-NEXT: V_AND_B32_e32
; SI-NOT: BFE
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI: v_add_i32
; SI-NEXT: v_and_b32_e32
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -102,10 +102,10 @@ define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
; SI: BUFFER_LOAD_DWORD
; SI: V_ADD_I32
; SI: BFE
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI: v_add_i32
; SI: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -116,11 +116,11 @@ define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspa
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
; SI: BUFFER_LOAD_DWORD
; SI: V_ADD_I32
; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8
; SI-NEXT: BFE
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI: v_add_i32
; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -131,11 +131,11 @@ define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspa
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
; SI: BUFFER_LOAD_DWORD
; SI: V_ADD_I32
; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80
; SI-NEXT: BFE
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI: v_add_i32
; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -146,10 +146,10 @@ define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspa
}
; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
; SI: BUFFER_LOAD_DWORD
; SI: V_ADD_I32
; SI-NEXT: BFE
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI: v_add_i32
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -160,9 +160,9 @@ define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrsp
}
; FUNC-LABEL: {{^}}bfe_u32_test_1:
; SI: BUFFER_LOAD_DWORD
; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
; SI: s_endpgm
; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
@@ -188,12 +188,12 @@ define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_u32_test_4:
; SI-NOT: LSHL
; SI-NOT: SHR
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: lshl
; SI-NOT: shr
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -204,11 +204,11 @@ define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_u32_test_5:
; SI: BUFFER_LOAD_DWORD
; SI-NOT: LSHL
; SI-NOT: SHR
; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
; SI: S_ENDPGM
; SI: buffer_load_dword
; SI-NOT: lshl
; SI-NOT: shr
; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
; SI: s_endpgm
define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -219,9 +219,9 @@ define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_u32_test_6:
; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: S_ENDPGM
; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: s_endpgm
define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -231,9 +231,9 @@ define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_u32_test_7:
; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -243,10 +243,10 @@ define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_u32_test_8:
; SI-NOT: BFE
; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -256,10 +256,10 @@ define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_u32_test_9:
; SI-NOT: BFE
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
@@ -268,10 +268,10 @@ define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FUNC-LABEL: {{^}}bfe_u32_test_10:
; SI-NOT: BFE
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
@@ -280,10 +280,10 @@ define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_u32_test_11:
; SI-NOT: BFE
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
@@ -292,10 +292,10 @@ define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_u32_test_12:
; SI-NOT: BFE
; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
@@ -305,8 +305,8 @@ define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; FUNC-LABEL: {{^}}bfe_u32_test_13:
; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
@@ -315,9 +315,9 @@ define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_u32_test_14:
; SI-NOT: LSHR
; SI-NOT: BFE
; SI: S_ENDPGM
; SI-NOT: lshr
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
@@ -326,10 +326,10 @@ define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
@@ -338,10 +338,10 @@ define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
@@ -350,10 +350,10 @@ define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
@@ -362,10 +362,10 @@ define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
@@ -374,10 +374,10 @@ define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
@@ -386,10 +386,10 @@ define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
@@ -398,10 +398,10 @@ define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
@@ -410,10 +410,10 @@ define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
@@ -422,10 +422,10 @@ define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
@@ -434,10 +434,10 @@ define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFEfppppppppppppp
define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
@@ -446,10 +446,10 @@ define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
@@ -458,10 +458,10 @@ define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
@@ -470,10 +470,10 @@ define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
@@ -482,10 +482,10 @@ define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
@@ -494,10 +494,10 @@ define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
@@ -506,10 +506,10 @@ define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
@@ -518,10 +518,10 @@ define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
@@ -530,10 +530,10 @@ define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
@@ -542,10 +542,10 @@ define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
}
; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
; SI-NOT: BFE
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
; SI: BUFFER_STORE_DWORD [[VREG]],
; SI: S_ENDPGM
; SI-NOT: {{[^@]}}bfe
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
@@ -558,12 +558,12 @@ define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
; XXX: The operand to v_bfe_u32 could also just directly be the load register.
; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
; SI: BUFFER_LOAD_DWORD [[ARG:v[0-9]+]]
; SI: V_AND_B32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
; SI: V_BFE_U32 [[BFE:v[0-9]+]], [[AND]], 2, 2
; SI-DAG: BUFFER_STORE_DWORD [[AND]]
; SI-DAG: BUFFER_STORE_DWORD [[BFE]]
; SI: S_ENDPGM
; SI: buffer_load_dword [[ARG:v[0-9]+]]
; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
; SI-DAG: buffer_store_dword [[AND]]
; SI-DAG: buffer_store_dword [[BFE]]
; SI: s_endpgm
define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
i32 addrspace(1)* %out1,
i32 addrspace(1)* %in) nounwind {

View File

@@ -4,7 +4,7 @@
declare i32 @llvm.AMDGPU.bfi(i32, i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfi_arg_arg_arg:
; SI: V_BFI_B32
; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
@@ -13,7 +13,7 @@ define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %
}
; FUNC-LABEL: {{^}}bfi_arg_arg_imm:
; SI: V_BFI_B32
; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -22,7 +22,7 @@ define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounw
}
; FUNC-LABEL: {{^}}bfi_arg_imm_arg:
; SI: V_BFI_B32
; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -31,7 +31,7 @@ define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounw
}
; FUNC-LABEL: {{^}}bfi_imm_arg_arg:
; SI: V_BFI_B32
; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 123, i32 %src1, i32 %src2) nounwind readnone

View File

@@ -4,7 +4,7 @@
declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfm_arg_arg:
; SI: V_BFM
; SI: v_bfm
; EG: BFM_INT
define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
@@ -13,7 +13,7 @@ define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind
}
; FUNC-LABEL: {{^}}bfm_arg_imm:
; SI: V_BFM
; SI: v_bfm
; EG: BFM_INT
define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
@@ -22,7 +22,7 @@ define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
}
; FUNC-LABEL: {{^}}bfm_imm_arg:
; SI: V_BFM
; SI: v_bfm
; EG: BFM_INT
define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
@@ -31,7 +31,7 @@ define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
}
; FUNC-LABEL: {{^}}bfm_imm_imm:
; SI: V_BFM
; SI: v_bfm
; EG: BFM_INT
define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone

View File

@@ -3,11 +3,11 @@
declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
; FUNC-LABEL: {{^}}s_brev_i32:
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
; SI: S_ENDPGM
; SI: s_load_dword [[VAL:s[0-9]+]],
; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
@@ -15,10 +15,10 @@ define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
}
; FUNC-LABEL: {{^}}v_brev_i32:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone

View File

@@ -5,10 +5,10 @@ declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone
declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone
; FUNC-LABEL: {{^}}clamp_0_1_f32:
; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: S_ENDPGM
; SI: s_load_dword [[ARG:s[0-9]+]],
; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
; EG: MOV_SAT
define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
@@ -18,9 +18,9 @@ define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
}
; FUNC-LABEL: {{^}}clamp_0_1_amdil_legacy_f32:
; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
; SI: BUFFER_STORE_DWORD [[RESULT]]
; SI: s_load_dword [[ARG:s[0-9]+]],
; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
; SI: buffer_store_dword [[RESULT]]
define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
%clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
store float %clamp, float addrspace(1)* %out, align 4

View File

@@ -6,7 +6,7 @@ declare float @llvm.AMDGPU.cvt.f32.ubyte2(i32) nounwind readnone
declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
; SI-LABEL: {{^}}test_unpack_byte0_to_float:
; SI: V_CVT_F32_UBYTE0
; SI: v_cvt_f32_ubyte0
define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
@@ -15,7 +15,7 @@ define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(
}
; SI-LABEL: {{^}}test_unpack_byte1_to_float:
; SI: V_CVT_F32_UBYTE1
; SI: v_cvt_f32_ubyte1
define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
@@ -24,7 +24,7 @@ define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(
}
; SI-LABEL: {{^}}test_unpack_byte2_to_float:
; SI: V_CVT_F32_UBYTE2
; SI: v_cvt_f32_ubyte2
define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
@@ -33,7 +33,7 @@ define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(
}
; SI-LABEL: {{^}}test_unpack_byte3_to_float:
; SI: V_CVT_F32_UBYTE3
; SI: v_cvt_f32_ubyte3
define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone

View File

@@ -4,14 +4,14 @@ declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
; SI-LABEL: {{^}}test_div_fixup_f32:
; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
; SI-DAG: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
%result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
@@ -19,7 +19,7 @@ define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, fl
}
; SI-LABEL: {{^}}test_div_fixup_f64:
; SI: V_DIV_FIXUP_F64
; SI: v_div_fixup_f64
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
%result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
store double %result, double addrspace(1)* %out, align 8

Some files were not shown because too many files have changed in this diff Show More