mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-03 14:21:30 +00:00 
			
		
		
		
	R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218776 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		@@ -99,7 +99,7 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
 | 
			
		||||
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 | 
			
		||||
  SetupMachineFunction(MF);
 | 
			
		||||
 | 
			
		||||
  OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
 | 
			
		||||
  EmitFunctionHeader();
 | 
			
		||||
 | 
			
		||||
  MCContext &Context = getObjFileLowering().getContext();
 | 
			
		||||
  const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +1,12 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 | 
			
		||||
 | 
			
		||||
; R600-CHECK: @v4i32_kernel_arg
 | 
			
		||||
; R600-CHECK: {{^}}v4i32_kernel_arg:
 | 
			
		||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
 | 
			
		||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
 | 
			
		||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
 | 
			
		||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
 | 
			
		||||
; SI-CHECK: @v4i32_kernel_arg
 | 
			
		||||
; SI-CHECK: {{^}}v4i32_kernel_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_STORE_DWORDX4
 | 
			
		||||
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32>  %in) {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -14,14 +14,14 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; R600-CHECK: @v4f32_kernel_arg
 | 
			
		||||
; R600-CHECK: {{^}}v4f32_kernel_arg:
 | 
			
		||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
 | 
			
		||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
 | 
			
		||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
 | 
			
		||||
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
 | 
			
		||||
; SI-CHECK: @v4f32_kernel_arg
 | 
			
		||||
; SI-CHECK: {{^}}v4f32_kernel_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_STORE_DWORDX4
 | 
			
		||||
define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float>  %in) {
 | 
			
		||||
define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float>  %in) {
 | 
			
		||||
entry:
 | 
			
		||||
  store <4 x float> %in, <4 x float> addrspace(1)* %out
 | 
			
		||||
  ret void
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@
 | 
			
		||||
; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
 | 
			
		||||
; instructions with B64, U64, and I64 take 64-bit operands.
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @local_address_load
 | 
			
		||||
; FUNC-LABEL: {{^}}local_address_load:
 | 
			
		||||
; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
 | 
			
		||||
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
 | 
			
		||||
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 | 
			
		||||
@@ -20,7 +20,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @local_address_gep
 | 
			
		||||
; FUNC-LABEL: {{^}}local_address_gep:
 | 
			
		||||
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
 | 
			
		||||
; CHECK: DS_READ_B32 [[VPTR]]
 | 
			
		||||
@@ -32,7 +32,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @local_address_gep_const_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}local_address_gep_const_offset:
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
 | 
			
		||||
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
 | 
			
		||||
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 | 
			
		||||
@@ -44,7 +44,7 @@ entry:
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Offset too large, can't fold into 16-bit immediate offset.
 | 
			
		||||
; FUNC-LABEL: @local_address_gep_large_const_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
 | 
			
		||||
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
 | 
			
		||||
; CHECK: DS_READ_B32 [[VPTR]]
 | 
			
		||||
@@ -56,7 +56,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @null_32bit_lds_ptr:
 | 
			
		||||
; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
 | 
			
		||||
; CHECK: V_CMP_NE_I32
 | 
			
		||||
; CHECK-NOT: V_CMP_NE_I32
 | 
			
		||||
; CHECK: V_CNDMASK_B32
 | 
			
		||||
@@ -67,7 +67,7 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @mul_32bit_ptr:
 | 
			
		||||
; FUNC-LABEL: {{^}}mul_32bit_ptr:
 | 
			
		||||
; CHECK: V_MUL_LO_I32
 | 
			
		||||
; CHECK-NEXT: V_ADD_I32_e32
 | 
			
		||||
; CHECK-NEXT: DS_READ_B32
 | 
			
		||||
@@ -80,7 +80,7 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %
 | 
			
		||||
 | 
			
		||||
@g_lds = addrspace(3) global float zeroinitializer, align 4
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @infer_ptr_alignment_global_offset:
 | 
			
		||||
; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
 | 
			
		||||
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[REG]]
 | 
			
		||||
define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
 | 
			
		||||
@@ -93,21 +93,21 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti
 | 
			
		||||
@ptr = addrspace(3) global i32 addrspace(3)* null
 | 
			
		||||
@dst = addrspace(3) global [16384 x i32] zeroinitializer
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @global_ptr:
 | 
			
		||||
; FUNC-LABEL: {{^}}global_ptr:
 | 
			
		||||
; CHECK: DS_WRITE_B32
 | 
			
		||||
define void @global_ptr() nounwind {
 | 
			
		||||
  store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @local_address_store
 | 
			
		||||
; FUNC-LABEL: {{^}}local_address_store:
 | 
			
		||||
; CHECK: DS_WRITE_B32
 | 
			
		||||
define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
 | 
			
		||||
  store i32 %val, i32 addrspace(3)* %out
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @local_address_gep_store
 | 
			
		||||
; FUNC-LABEL: {{^}}local_address_gep_store:
 | 
			
		||||
; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
 | 
			
		||||
; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
 | 
			
		||||
@@ -117,7 +117,7 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @local_address_gep_const_offset_store
 | 
			
		||||
; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
 | 
			
		||||
; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
 | 
			
		||||
@@ -128,7 +128,7 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Offset too large, can't fold into 16-bit immediate offset.
 | 
			
		||||
; FUNC-LABEL: @local_address_gep_large_const_offset_store
 | 
			
		||||
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
 | 
			
		||||
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
 | 
			
		||||
; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 | 
			
		||||
 | 
			
		||||
; SI-CHECK: @f64_kernel_arg
 | 
			
		||||
; SI-CHECK: {{^}}f64_kernel_arg:
 | 
			
		||||
; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
 | 
			
		||||
; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
 | 
			
		||||
; SI-CHECK: BUFFER_STORE_DWORDX2
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
;FUNC-LABEL: @test1:
 | 
			
		||||
;FUNC-LABEL: {{^}}test1:
 | 
			
		||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
 | 
			
		||||
;SI-CHECK: V_ADD_I32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
 | 
			
		||||
@@ -16,7 +16,7 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
;FUNC-LABEL: @test2:
 | 
			
		||||
;FUNC-LABEL: {{^}}test2:
 | 
			
		||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
 | 
			
		||||
@@ -32,7 +32,7 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
;FUNC-LABEL: @test4:
 | 
			
		||||
;FUNC-LABEL: {{^}}test4:
 | 
			
		||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
@@ -52,7 +52,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test8
 | 
			
		||||
; FUNC-LABEL: {{^}}test8:
 | 
			
		||||
; EG-CHECK: ADD_INT
 | 
			
		||||
; EG-CHECK: ADD_INT
 | 
			
		||||
; EG-CHECK: ADD_INT
 | 
			
		||||
@@ -76,7 +76,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test16
 | 
			
		||||
; FUNC-LABEL: {{^}}test16:
 | 
			
		||||
; EG-CHECK: ADD_INT
 | 
			
		||||
; EG-CHECK: ADD_INT
 | 
			
		||||
; EG-CHECK: ADD_INT
 | 
			
		||||
@@ -116,7 +116,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @add64
 | 
			
		||||
; FUNC-LABEL: {{^}}add64:
 | 
			
		||||
; SI-CHECK: S_ADD_U32
 | 
			
		||||
; SI-CHECK: S_ADDC_U32
 | 
			
		||||
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 | 
			
		||||
@@ -131,7 +131,7 @@ entry:
 | 
			
		||||
; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
 | 
			
		||||
; to a VGPR before doing the add.
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @add64_sgpr_vgpr
 | 
			
		||||
; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
 | 
			
		||||
; SI-CHECK-NOT: V_ADDC_U32_e32 s
 | 
			
		||||
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -142,7 +142,7 @@ entry:
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Test i64 add inside a branch.
 | 
			
		||||
; FUNC-LABEL: @add64_in_branch
 | 
			
		||||
; FUNC-LABEL: {{^}}add64_in_branch:
 | 
			
		||||
; SI-CHECK: S_ADD_U32
 | 
			
		||||
; SI-CHECK: S_ADDC_U32
 | 
			
		||||
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@
 | 
			
		||||
 | 
			
		||||
declare i32 @llvm.r600.read.tidig.x() readnone
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_vreg:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_vreg:
 | 
			
		||||
; SI: V_ADD_I32
 | 
			
		||||
; SI: V_ADDC_U32
 | 
			
		||||
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
 | 
			
		||||
@@ -18,7 +18,7 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Check that the SGPR add operand is correctly moved to a VGPR.
 | 
			
		||||
; SI-LABEL: @sgpr_operand:
 | 
			
		||||
; SI-LABEL: {{^}}sgpr_operand:
 | 
			
		||||
; SI: V_ADD_I32
 | 
			
		||||
; SI: V_ADDC_U32
 | 
			
		||||
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
 | 
			
		||||
@@ -31,7 +31,7 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
 | 
			
		||||
; Swap the arguments. Check that the SGPR -> VGPR copy works with the
 | 
			
		||||
; SGPR as other operand.
 | 
			
		||||
;
 | 
			
		||||
; SI-LABEL: @sgpr_operand_reversed:
 | 
			
		||||
; SI-LABEL: {{^}}sgpr_operand_reversed:
 | 
			
		||||
; SI: V_ADD_I32
 | 
			
		||||
; SI: V_ADDC_U32
 | 
			
		||||
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
 | 
			
		||||
@@ -42,7 +42,7 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_v2i64_sreg:
 | 
			
		||||
; SI-LABEL: {{^}}test_v2i64_sreg:
 | 
			
		||||
; SI: S_ADD_U32
 | 
			
		||||
; SI: S_ADDC_U32
 | 
			
		||||
; SI: S_ADD_U32
 | 
			
		||||
@@ -53,7 +53,7 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_v2i64_vreg:
 | 
			
		||||
; SI-LABEL: {{^}}test_v2i64_vreg:
 | 
			
		||||
; SI: V_ADD_I32
 | 
			
		||||
; SI: V_ADDC_U32
 | 
			
		||||
; SI: V_ADD_I32
 | 
			
		||||
@@ -69,7 +69,7 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @trunc_i64_add_to_i32
 | 
			
		||||
; SI-LABEL: {{^}}trunc_i64_add_to_i32:
 | 
			
		||||
; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
 | 
			
		||||
; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
 | 
			
		||||
; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@
 | 
			
		||||
; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is
 | 
			
		||||
; already in a VGPR after the first read.
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @do_as_ptr_calcs:
 | 
			
		||||
; CHECK-LABEL: {{^}}do_as_ptr_calcs:
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
 | 
			
		||||
; CHECK-DAG: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 0xc
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test2
 | 
			
		||||
; FUNC-LABEL: {{^}}test2:
 | 
			
		||||
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
 | 
			
		||||
@@ -17,7 +17,7 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test4
 | 
			
		||||
; FUNC-LABEL: {{^}}test4:
 | 
			
		||||
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
@@ -37,7 +37,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_and_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}s_and_i32:
 | 
			
		||||
; SI: S_AND_B32
 | 
			
		||||
define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 | 
			
		||||
  %and = and i32 %a, %b
 | 
			
		||||
@@ -45,7 +45,7 @@ define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_and_constant_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}s_and_constant_i32:
 | 
			
		||||
; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
 | 
			
		||||
define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
 | 
			
		||||
  %and = and i32 %a, 1234567
 | 
			
		||||
@@ -53,7 +53,7 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_and_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}v_and_i32:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
 | 
			
		||||
  %a = load i32 addrspace(1)* %aptr, align 4
 | 
			
		||||
@@ -63,7 +63,7 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_and_constant_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}v_and_constant_i32:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
 | 
			
		||||
  %a = load i32 addrspace(1)* %aptr, align 4
 | 
			
		||||
@@ -72,7 +72,7 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_and_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}s_and_i64:
 | 
			
		||||
; SI: S_AND_B64
 | 
			
		||||
define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 | 
			
		||||
  %and = and i64 %a, %b
 | 
			
		||||
@@ -81,7 +81,7 @@ define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FIXME: Should use SGPRs
 | 
			
		||||
; FUNC-LABEL: @s_and_i1
 | 
			
		||||
; FUNC-LABEL: {{^}}s_and_i1:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
 | 
			
		||||
  %and = and i1 %a, %b
 | 
			
		||||
@@ -89,7 +89,7 @@ define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_and_constant_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}s_and_constant_i64:
 | 
			
		||||
; SI: S_AND_B64
 | 
			
		||||
define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
 | 
			
		||||
  %and = and i64 %a, 281474976710655
 | 
			
		||||
@@ -97,7 +97,7 @@ define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_and_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}v_and_i64:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
 | 
			
		||||
@@ -108,7 +108,7 @@ define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addr
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_and_i64_br
 | 
			
		||||
; FUNC-LABEL: {{^}}v_and_i64_br:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
define void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i32 %cond) {
 | 
			
		||||
@@ -128,7 +128,7 @@ endif:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_and_constant_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}v_and_constant_i64:
 | 
			
		||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
 | 
			
		||||
@@ -139,7 +139,7 @@ define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FIXME: Replace and 0 with mov 0
 | 
			
		||||
; FUNC-LABEL: @v_and_inline_imm_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}v_and_inline_imm_i64:
 | 
			
		||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
 | 
			
		||||
; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
 | 
			
		||||
define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
 | 
			
		||||
@@ -149,7 +149,7 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_and_inline_imm_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}s_and_inline_imm_i64:
 | 
			
		||||
; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
 | 
			
		||||
define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 | 
			
		||||
  %and = and i64 %a, 64
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @anyext_i1_i32
 | 
			
		||||
; CHECK-LABEL: {{^}}anyext_i1_i32:
 | 
			
		||||
; CHECK: V_CNDMASK_B32_e64
 | 
			
		||||
define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
 | 
			
		||||
entry:
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 | 
			
		||||
; 64-bit pointer add. This should work since private pointers should
 | 
			
		||||
; be 32-bits.
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_private_array_ptr_calc:
 | 
			
		||||
; SI-LABEL: {{^}}test_private_array_ptr_calc:
 | 
			
		||||
 | 
			
		||||
; FIXME: We end up with zero argument for ADD, because
 | 
			
		||||
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
declare i32 @llvm.SI.tid() readnone
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_array_ptr_calc
 | 
			
		||||
; SI-LABEL: {{^}}test_array_ptr_calc:
 | 
			
		||||
; SI: V_MUL_LO_I32
 | 
			
		||||
; SI: V_MUL_HI_I32
 | 
			
		||||
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
 | 
			
		||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
 | 
			
		||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
			
		||||
; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
 | 
			
		||||
; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
 | 
			
		||||
@@ -17,7 +17,7 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i64_offset:
 | 
			
		||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
 | 
			
		||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
			
		||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
 | 
			
		||||
; SI: S_MOV_B64  s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
 | 
			
		||||
@@ -37,7 +37,7 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_bad_si_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset:
 | 
			
		||||
; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
 | 
			
		||||
; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -51,7 +51,7 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @lds_atomic_cmpxchg_noret_i32_offset:
 | 
			
		||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
 | 
			
		||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
 | 
			
		||||
; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
 | 
			
		||||
; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
 | 
			
		||||
@@ -66,7 +66,7 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @lds_atomic_cmpxchg_noret_i64_offset:
 | 
			
		||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
 | 
			
		||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
 | 
			
		||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
			
		||||
; SI: S_MOV_B64  s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_add_local
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_add_local:
 | 
			
		||||
; R600: LDS_ADD *
 | 
			
		||||
; SI: DS_ADD_U32
 | 
			
		||||
define void @atomic_add_local(i32 addrspace(3)* %local) {
 | 
			
		||||
@@ -9,7 +9,7 @@ define void @atomic_add_local(i32 addrspace(3)* %local) {
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_add_local_const_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
 | 
			
		||||
; R600: LDS_ADD *
 | 
			
		||||
; SI: DS_ADD_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
 | 
			
		||||
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
 | 
			
		||||
@@ -18,7 +18,7 @@ define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_add_ret_local
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_add_ret_local:
 | 
			
		||||
; R600: LDS_ADD_RET *
 | 
			
		||||
; SI: DS_ADD_RTN_U32
 | 
			
		||||
define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
 | 
			
		||||
@@ -27,7 +27,7 @@ define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_add_ret_local_const_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
 | 
			
		||||
; R600: LDS_ADD_RET *
 | 
			
		||||
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
 | 
			
		||||
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_sub_local
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_sub_local:
 | 
			
		||||
; R600: LDS_SUB *
 | 
			
		||||
; SI: DS_SUB_U32
 | 
			
		||||
define void @atomic_sub_local(i32 addrspace(3)* %local) {
 | 
			
		||||
@@ -9,7 +9,7 @@ define void @atomic_sub_local(i32 addrspace(3)* %local) {
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_sub_local_const_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
 | 
			
		||||
; R600: LDS_SUB *
 | 
			
		||||
; SI: DS_SUB_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10
 | 
			
		||||
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
 | 
			
		||||
@@ -18,7 +18,7 @@ define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_sub_ret_local
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_sub_ret_local:
 | 
			
		||||
; R600: LDS_SUB_RET *
 | 
			
		||||
; SI: DS_SUB_RTN_U32
 | 
			
		||||
define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
 | 
			
		||||
@@ -27,7 +27,7 @@ define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_sub_ret_local_const_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
 | 
			
		||||
; R600: LDS_SUB_RET *
 | 
			
		||||
; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
 | 
			
		||||
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; XFAIL: *
 | 
			
		||||
; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @test_branch(
 | 
			
		||||
; CHECK-LABEL: {{^}}test_branch(
 | 
			
		||||
define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
 | 
			
		||||
  %cmp = icmp ne i32 %val, 0
 | 
			
		||||
  br i1 %cmp, label %store, label %end
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; XFAIL: *
 | 
			
		||||
; RUN: llc -O0 -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @test_loop:
 | 
			
		||||
; CHECK-LABEL: {{^}}test_loop:
 | 
			
		||||
define void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
  br label %loop.body
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
; XFAIL: *
 | 
			
		||||
 | 
			
		||||
; CHECK: @bfe_def
 | 
			
		||||
; CHECK: {{^}}bfe_def:
 | 
			
		||||
; CHECK: BFE_UINT
 | 
			
		||||
define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -17,7 +17,7 @@ entry:
 | 
			
		||||
; implmented with a LSHR instruction, which is better, because LSHR has less
 | 
			
		||||
; operands and requires less constants.
 | 
			
		||||
 | 
			
		||||
; CHECK: @bfe_shift
 | 
			
		||||
; CHECK: {{^}}bfe_shift:
 | 
			
		||||
; CHECK-NOT: BFE_UINT
 | 
			
		||||
define void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
 | 
			
		||||
entry:
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
; BFI_INT Definition pattern from ISA docs
 | 
			
		||||
; (y & x) | (z & ~x)
 | 
			
		||||
;
 | 
			
		||||
; R600-CHECK: @bfi_def
 | 
			
		||||
; R600-CHECK: {{^}}bfi_def:
 | 
			
		||||
; R600-CHECK: BFI_INT
 | 
			
		||||
; SI-CHECK:   @bfi_def
 | 
			
		||||
; SI-CHECK:   V_BFI_B32
 | 
			
		||||
@@ -20,7 +20,7 @@ entry:
 | 
			
		||||
 | 
			
		||||
; SHA-256 Ch function
 | 
			
		||||
; z ^ (x & (y ^ z))
 | 
			
		||||
; R600-CHECK: @bfi_sha256_ch
 | 
			
		||||
; R600-CHECK: {{^}}bfi_sha256_ch:
 | 
			
		||||
; R600-CHECK: BFI_INT
 | 
			
		||||
; SI-CHECK:   @bfi_sha256_ch
 | 
			
		||||
; SI-CHECK:   V_BFI_B32
 | 
			
		||||
@@ -35,7 +35,7 @@ entry:
 | 
			
		||||
 | 
			
		||||
; SHA-256 Ma function
 | 
			
		||||
; ((x & z) | (y & (x | z)))
 | 
			
		||||
; R600-CHECK: @bfi_sha256_ma
 | 
			
		||||
; R600-CHECK: {{^}}bfi_sha256_ma:
 | 
			
		||||
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
 | 
			
		||||
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
 | 
			
		||||
; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v32i8_to_v8i32
 | 
			
		||||
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -17,7 +17,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @i8ptr_v16i8ptr
 | 
			
		||||
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -55,7 +55,7 @@ define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nou
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @bitcast_v2i32_to_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
 | 
			
		||||
  %val = load <2 x i32> addrspace(1)* %in, align 8
 | 
			
		||||
@@ -65,7 +65,7 @@ define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @bitcast_f64_to_v2i32
 | 
			
		||||
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
 | 
			
		||||
  %val = load double addrspace(1)* %in, align 8
 | 
			
		||||
 
 | 
			
		||||
@@ -1,11 +1,11 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 | 
			
		||||
 | 
			
		||||
; R600-CHECK: @build_vector2
 | 
			
		||||
; R600-CHECK: {{^}}build_vector2:
 | 
			
		||||
; R600-CHECK: MOV
 | 
			
		||||
; R600-CHECK: MOV
 | 
			
		||||
; R600-CHECK-NOT: MOV
 | 
			
		||||
; SI-CHECK: @build_vector2
 | 
			
		||||
; SI-CHECK: {{^}}build_vector2:
 | 
			
		||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
 | 
			
		||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
 | 
			
		||||
; SI-CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[X]]:[[Y]]{{\]}}
 | 
			
		||||
@@ -15,13 +15,13 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; R600-CHECK: @build_vector4
 | 
			
		||||
; R600-CHECK: {{^}}build_vector4:
 | 
			
		||||
; R600-CHECK: MOV
 | 
			
		||||
; R600-CHECK: MOV
 | 
			
		||||
; R600-CHECK: MOV
 | 
			
		||||
; R600-CHECK: MOV
 | 
			
		||||
; R600-CHECK-NOT: MOV
 | 
			
		||||
; SI-CHECK: @build_vector4
 | 
			
		||||
; SI-CHECK: {{^}}build_vector4:
 | 
			
		||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
 | 
			
		||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
 | 
			
		||||
; SI-CHECK-DAG: V_MOV_B32_e32 v[[Z:[0-9]]], 7
 | 
			
		||||
 
 | 
			
		||||
@@ -2,10 +2,10 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG-CHECK %s
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s
 | 
			
		||||
 | 
			
		||||
; EG-CHECK: @call_fs
 | 
			
		||||
; EG-CHECK: {{^}}call_fs:
 | 
			
		||||
; EG-CHECK: .long 257
 | 
			
		||||
; EG-CHECK: CALL_FS  ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
 | 
			
		||||
; R600-CHECK: @call_fs
 | 
			
		||||
; R600-CHECK: {{^}}call_fs:
 | 
			
		||||
; R600-CHECK: .long 257
 | 
			
		||||
; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @main
 | 
			
		||||
; CHECK-LABEL: {{^}}main:
 | 
			
		||||
; CHECK: LOOP_START_DX10
 | 
			
		||||
; CHECK: ALU_PUSH_BEFORE
 | 
			
		||||
; CHECK: LOOP_START_DX10
 | 
			
		||||
 
 | 
			
		||||
@@ -17,7 +17,7 @@
 | 
			
		||||
; BUG64-NOT: Applying bug work-around
 | 
			
		||||
; BUG32-NOT: Applying bug work-around
 | 
			
		||||
; NOBUG-NOT: Applying bug work-around
 | 
			
		||||
; FUNC-LABEL: @nested3
 | 
			
		||||
; FUNC-LABEL: {{^}}nested3:
 | 
			
		||||
define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
 | 
			
		||||
entry:
 | 
			
		||||
  %0 = icmp sgt i32 %cond, 0
 | 
			
		||||
@@ -50,7 +50,7 @@ end:
 | 
			
		||||
; BUG64: Applying bug work-around
 | 
			
		||||
; BUG32-NOT: Applying bug work-around
 | 
			
		||||
; NOBUG-NOT: Applying bug work-around
 | 
			
		||||
; FUNC-LABEL: @nested4
 | 
			
		||||
; FUNC-LABEL: {{^}}nested4:
 | 
			
		||||
define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
 | 
			
		||||
entry:
 | 
			
		||||
  %0 = icmp sgt i32 %cond, 0
 | 
			
		||||
@@ -91,7 +91,7 @@ end:
 | 
			
		||||
; BUG64: Applying bug work-around
 | 
			
		||||
; BUG32-NOT: Applying bug work-around
 | 
			
		||||
; NOBUG-NOT: Applying bug work-around
 | 
			
		||||
; FUNC-LABEL: @nested7
 | 
			
		||||
; FUNC-LABEL: {{^}}nested7:
 | 
			
		||||
define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
 | 
			
		||||
entry:
 | 
			
		||||
  %0 = icmp sgt i32 %cond, 0
 | 
			
		||||
@@ -156,7 +156,7 @@ end:
 | 
			
		||||
; BUG64: Applying bug work-around
 | 
			
		||||
; BUG32: Applying bug work-around
 | 
			
		||||
; NOBUG-NOT: Applying bug work-around
 | 
			
		||||
; FUNC-LABEL: @nested8
 | 
			
		||||
; FUNC-LABEL: {{^}}nested8:
 | 
			
		||||
define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
 | 
			
		||||
entry:
 | 
			
		||||
  %0 = icmp sgt i32 %cond, 0
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +1,13 @@
 | 
			
		||||
; RUN: opt -codegenprepare -S -o - %s | FileCheck --check-prefix=OPT --check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC --check-prefix=FUNC %s
 | 
			
		||||
; RUN: opt -codegenprepare -S -o - %s | FileCheck --check-prefix=OPT %s
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC %s
 | 
			
		||||
 | 
			
		||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
 | 
			
		||||
target triple = "r600--"
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test
 | 
			
		||||
; OPT-LABEL: @test
 | 
			
		||||
; OPT: mul nsw i32
 | 
			
		||||
; OPT-NEXT: sext
 | 
			
		||||
; SI-LLC-LABEL: {{^}}test:
 | 
			
		||||
; SI-LLC: S_MUL_I32
 | 
			
		||||
; SI-LLC-NOT: MUL
 | 
			
		||||
define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; 128-bit loads instead of many 8-bit
 | 
			
		||||
; EG-LABEL: @combine_vloads:
 | 
			
		||||
; EG-LABEL: {{^}}combine_vloads:
 | 
			
		||||
; EG: VTX_READ_128
 | 
			
		||||
; EG: VTX_READ_128
 | 
			
		||||
define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK: @main
 | 
			
		||||
; CHECK: {{^}}main:
 | 
			
		||||
; CHECK-NOT: MOV
 | 
			
		||||
define void @main(<4 x float> inreg %reg0) #0 {
 | 
			
		||||
entry:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v1i32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v1i32:
 | 
			
		||||
; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF
 | 
			
		||||
; instructions that access scratch memory.  Bit 23, which is the add_tid_enable
 | 
			
		||||
; bit, is only set for scratch access, so we can check for the absence of this
 | 
			
		||||
@@ -13,7 +13,7 @@ define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v2i32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v2i32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 | 
			
		||||
@@ -22,7 +22,7 @@ define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v4i32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v4i32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
 | 
			
		||||
@@ -31,7 +31,7 @@ define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v8i32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v8i32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
 | 
			
		||||
@@ -40,7 +40,7 @@ define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v16i32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v16i32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind {
 | 
			
		||||
@@ -49,7 +49,7 @@ define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <1
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v1f32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v1f32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind {
 | 
			
		||||
@@ -58,7 +58,7 @@ define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v2f32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
 | 
			
		||||
@@ -67,7 +67,7 @@ define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v4f32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
 | 
			
		||||
@@ -76,7 +76,7 @@ define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v8f32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v8f32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
 | 
			
		||||
@@ -85,7 +85,7 @@ define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v16f32
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v16f32:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
 | 
			
		||||
@@ -94,7 +94,7 @@ define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v1i64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v1i64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
 | 
			
		||||
@@ -103,7 +103,7 @@ define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v2i64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v2i64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
 | 
			
		||||
@@ -112,7 +112,7 @@ define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v4i64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v4i64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
 | 
			
		||||
@@ -121,7 +121,7 @@ define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v8i64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v8i64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
 | 
			
		||||
@@ -130,7 +130,7 @@ define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v16i64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v16i64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
 | 
			
		||||
@@ -139,7 +139,7 @@ define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v1f64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v1f64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
 | 
			
		||||
@@ -148,7 +148,7 @@ define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v2f64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v2f64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
 | 
			
		||||
@@ -157,7 +157,7 @@ define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v4f64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v4f64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
 | 
			
		||||
@@ -166,7 +166,7 @@ define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v8f64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v8f64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
 | 
			
		||||
@@ -175,7 +175,7 @@ define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v16f64
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v16f64:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
 | 
			
		||||
@@ -184,7 +184,7 @@ define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v1i1
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v1i1:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind {
 | 
			
		||||
@@ -193,7 +193,7 @@ define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v2i1
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v2i1:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind {
 | 
			
		||||
@@ -202,7 +202,7 @@ define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v4i1
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v4i1:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind {
 | 
			
		||||
@@ -211,7 +211,7 @@ define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v8i1
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v8i1:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind {
 | 
			
		||||
@@ -220,7 +220,7 @@ define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v16i1
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v16i1:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind {
 | 
			
		||||
@@ -229,7 +229,7 @@ define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v32i1
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v32i1:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind {
 | 
			
		||||
@@ -238,7 +238,7 @@ define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v1i16
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v1i16:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
 | 
			
		||||
@@ -247,7 +247,7 @@ define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v2i16
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v2i16:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
 | 
			
		||||
@@ -256,7 +256,7 @@ define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v4i16
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v4i16:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
 | 
			
		||||
@@ -265,7 +265,7 @@ define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v8i16
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v8i16:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
 | 
			
		||||
@@ -274,7 +274,7 @@ define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_concat_v16i16
 | 
			
		||||
; FUNC-LABEL: {{^}}test_concat_v16i16:
 | 
			
		||||
; SI-NOT: S_MOV_B32 s{{[0-9]}}, 0x80f000
 | 
			
		||||
; SI-NOT: MOVREL
 | 
			
		||||
define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v4i8
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v4i8:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -10,7 +10,7 @@ define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)*
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v4i8_x2
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v4i8_x2:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -22,7 +22,7 @@ define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v4i8_x3
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v4i8_x3:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -36,7 +36,7 @@ define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v4i8_x4
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v4i8_x4:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -52,7 +52,7 @@ define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v4i8_extra_use
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v4i8_extra_use:
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
@@ -88,7 +88,7 @@ define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> add
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v4i8_x2_extra_use
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v4i8_x2_extra_use:
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
@@ -127,7 +127,7 @@ define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v3i8
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v3i8:
 | 
			
		||||
; SI-NOT: BFE
 | 
			
		||||
; SI-NOT: BFI
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -137,7 +137,7 @@ define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)*
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v4i8_volatile_load
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
@@ -149,7 +149,7 @@ define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copy_v4i8_volatile_store
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_store:
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 | 
			
		||||
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
 | 
			
		||||
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_ctlz_zero_undef_i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
 | 
			
		||||
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
 | 
			
		||||
; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
 | 
			
		||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
 | 
			
		||||
@@ -19,7 +19,7 @@ define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctlz_zero_undef_i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
 | 
			
		||||
; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
 | 
			
		||||
@@ -33,7 +33,7 @@ define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctlz_zero_undef_v2i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v2i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORDX2
 | 
			
		||||
; SI: V_FFBH_U32_e32
 | 
			
		||||
; SI: V_FFBH_U32_e32
 | 
			
		||||
@@ -49,7 +49,7 @@ define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctlz_zero_undef_v4i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v4i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORDX4
 | 
			
		||||
; SI: V_FFBH_U32_e32
 | 
			
		||||
; SI: V_FFBH_U32_e32
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
 | 
			
		||||
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
 | 
			
		||||
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_ctpop_i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}s_ctpop_i32:
 | 
			
		||||
; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]],
 | 
			
		||||
; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]]
 | 
			
		||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
 | 
			
		||||
@@ -22,7 +22,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; XXX - Why 0 in register?
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
 | 
			
		||||
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
 | 
			
		||||
@@ -37,7 +37,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_add_chain_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
 | 
			
		||||
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
 | 
			
		||||
@@ -58,7 +58,7 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_add_sgpr_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
 | 
			
		||||
; SI-NEXT: S_WAITCNT
 | 
			
		||||
; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
 | 
			
		||||
@@ -72,7 +72,7 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_v2i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -86,7 +86,7 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_v4i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
@@ -104,7 +104,7 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_v8i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
@@ -130,7 +130,7 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_v16i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32
 | 
			
		||||
@@ -172,7 +172,7 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_i32_add_inline_constant:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
 | 
			
		||||
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
 | 
			
		||||
@@ -187,7 +187,7 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_i32_add_inline_constant_inv:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
 | 
			
		||||
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
 | 
			
		||||
@@ -202,7 +202,7 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_i32_add_literal:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
 | 
			
		||||
; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
 | 
			
		||||
@@ -216,7 +216,7 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_i32_add_var:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
 | 
			
		||||
; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
 | 
			
		||||
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
 | 
			
		||||
@@ -232,7 +232,7 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_i32_add_var_inv:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
 | 
			
		||||
; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
 | 
			
		||||
; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
 | 
			
		||||
@@ -248,7 +248,7 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
 | 
			
		||||
@@ -269,7 +269,7 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
 | 
			
		||||
; FIXME: We currently disallow SALU instructions in all branches,
 | 
			
		||||
; but there are some cases when the should be allowed.
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ctpop_i32_in_br
 | 
			
		||||
; FUNC-LABEL: {{^}}ctpop_i32_in_br:
 | 
			
		||||
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
 | 
			
		||||
; SI: S_BCNT1_I32_B32  [[SRESULT:s[0-9]+]], [[VAL]]
 | 
			
		||||
; SI: V_MOV_B32_e32 [[RESULT]], [[SRESULT]]
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone
 | 
			
		||||
declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
 | 
			
		||||
declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_ctpop_i64:
 | 
			
		||||
; FUNC-LABEL: {{^}}s_ctpop_i64:
 | 
			
		||||
; SI: S_LOAD_DWORDX2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
			
		||||
; SI: S_BCNT1_I32_B64 [[SRESULT:s[0-9]+]], [[SVAL]]
 | 
			
		||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
 | 
			
		||||
@@ -19,7 +19,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_i64:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_i64:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
 | 
			
		||||
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
 | 
			
		||||
; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
 | 
			
		||||
@@ -34,7 +34,7 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_ctpop_v2i64:
 | 
			
		||||
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
 | 
			
		||||
; SI: S_BCNT1_I32_B64
 | 
			
		||||
; SI: S_BCNT1_I32_B64
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -45,7 +45,7 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_ctpop_v4i64:
 | 
			
		||||
; FUNC-LABEL: {{^}}s_ctpop_v4i64:
 | 
			
		||||
; SI: S_BCNT1_I32_B64
 | 
			
		||||
; SI: S_BCNT1_I32_B64
 | 
			
		||||
; SI: S_BCNT1_I32_B64
 | 
			
		||||
@@ -58,7 +58,7 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_v2i64:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_v2i64:
 | 
			
		||||
; SI: V_BCNT_U32_B32
 | 
			
		||||
; SI: V_BCNT_U32_B32
 | 
			
		||||
; SI: V_BCNT_U32_B32
 | 
			
		||||
@@ -72,7 +72,7 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_ctpop_v4i64:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_ctpop_v4i64:
 | 
			
		||||
; SI: V_BCNT_U32_B32
 | 
			
		||||
; SI: V_BCNT_U32_B32
 | 
			
		||||
; SI: V_BCNT_U32_B32
 | 
			
		||||
@@ -93,7 +93,7 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
 | 
			
		||||
; FIXME: We currently disallow SALU instructions in all branches,
 | 
			
		||||
; but there are some cases when the should be allowed.
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ctpop_i64_in_br
 | 
			
		||||
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
 | 
			
		||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
 | 
			
		||||
; SI: S_BCNT1_I32_B64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
 | 
			
		||||
; SI: V_MOV_B32_e32 v[[VLO:[0-9]+]], [[RESULT]]
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
 | 
			
		||||
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
 | 
			
		||||
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_cttz_zero_undef_i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
 | 
			
		||||
; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
 | 
			
		||||
; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
 | 
			
		||||
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
 | 
			
		||||
@@ -19,7 +19,7 @@ define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_cttz_zero_undef_i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
 | 
			
		||||
; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
 | 
			
		||||
@@ -33,7 +33,7 @@ define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_cttz_zero_undef_v2i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v2i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORDX2
 | 
			
		||||
; SI: V_FFBL_B32_e32
 | 
			
		||||
; SI: V_FFBL_B32_e32
 | 
			
		||||
@@ -49,7 +49,7 @@ define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_cttz_zero_undef_v4i32:
 | 
			
		||||
; FUNC-LABEL: {{^}}v_cttz_zero_undef_v4i32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORDX4
 | 
			
		||||
; SI: V_FFBL_B32_e32
 | 
			
		||||
; SI: V_FFBL_B32_e32
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @load_i8_to_f32:
 | 
			
		||||
; SI-LABEL: {{^}}load_i8_to_f32:
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]],
 | 
			
		||||
; SI-NOT: BFE
 | 
			
		||||
; SI-NOT: LSHR
 | 
			
		||||
@@ -13,7 +13,7 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @load_v2i8_to_v2f32:
 | 
			
		||||
; SI-LABEL: {{^}}load_v2i8_to_v2f32:
 | 
			
		||||
; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]],
 | 
			
		||||
; SI-NOT: BFE
 | 
			
		||||
; SI-NOT: LSHR
 | 
			
		||||
@@ -28,7 +28,7 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @load_v3i8_to_v3f32:
 | 
			
		||||
; SI-LABEL: {{^}}load_v3i8_to_v3f32:
 | 
			
		||||
; SI-NOT: BFE
 | 
			
		||||
; SI-NOT: V_CVT_F32_UBYTE3_e32
 | 
			
		||||
; SI-DAG: V_CVT_F32_UBYTE2_e32
 | 
			
		||||
@@ -42,7 +42,7 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @load_v4i8_to_v4f32:
 | 
			
		||||
; SI-LABEL: {{^}}load_v4i8_to_v4f32:
 | 
			
		||||
; We can't use BUFFER_LOAD_DWORD here, because the load is byte aligned, and
 | 
			
		||||
; BUFFER_LOAD_DWORD requires dword alignment.
 | 
			
		||||
; SI: BUFFER_LOAD_USHORT
 | 
			
		||||
@@ -66,7 +66,7 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
 | 
			
		||||
; for each component, but computeKnownBits doesn't handle vectors very
 | 
			
		||||
; well.
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @load_v4i8_to_v4f32_2_uses:
 | 
			
		||||
; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses:
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI: BUFFER_LOAD_UBYTE
 | 
			
		||||
@@ -93,7 +93,7 @@ define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Make sure this doesn't crash.
 | 
			
		||||
; SI-LABEL: @load_v7i8_to_v7f32:
 | 
			
		||||
; SI-LABEL: {{^}}load_v7i8_to_v7f32:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
 | 
			
		||||
  %load = load <7 x i8> addrspace(1)* %in, align 1
 | 
			
		||||
@@ -102,7 +102,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @load_v8i8_to_v8f32:
 | 
			
		||||
; SI-LABEL: {{^}}load_v8i8_to_v8f32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
 | 
			
		||||
; SI-NOT: BFE
 | 
			
		||||
; SI-NOT: LSHR
 | 
			
		||||
@@ -131,7 +131,7 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @i8_zext_inreg_i32_to_f32:
 | 
			
		||||
; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
 | 
			
		||||
; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
 | 
			
		||||
; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]]
 | 
			
		||||
@@ -145,7 +145,7 @@ define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addr
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @i8_zext_inreg_hi1_to_f32:
 | 
			
		||||
; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
 | 
			
		||||
define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
 | 
			
		||||
  %load = load i32 addrspace(1)* %in, align 4
 | 
			
		||||
  %inreg = and i32 %load, 65280
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@
 | 
			
		||||
; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; CHECK: @sint
 | 
			
		||||
; CHECK: {{^}}sint:
 | 
			
		||||
; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
 | 
			
		||||
define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 | 
			
		||||
@@ -21,7 +21,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
;CHECK: @uint
 | 
			
		||||
;CHECK: {{^}}uint:
 | 
			
		||||
;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
 | 
			
		||||
define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_kernel
 | 
			
		||||
; FUNC-LABEL: {{^}}test_kernel:
 | 
			
		||||
 | 
			
		||||
; DEFAULT: FloatMode: 192
 | 
			
		||||
; DEFAULT: IeeeMode: 0
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
; result.  This tests that there are no instructions between the PRED_SET*
 | 
			
		||||
; and the PREDICATE_BREAK in this loop.
 | 
			
		||||
 | 
			
		||||
; CHECK: @loop_ge
 | 
			
		||||
; CHECK: {{^}}loop_ge:
 | 
			
		||||
; CHECK: LOOP_START_DX10
 | 
			
		||||
; CHECK: ALU_PUSH_BEFORE
 | 
			
		||||
; CHECK-NEXT: JUMP
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
; Exactly one constant vector can be folded into dot4, which means exactly
 | 
			
		||||
; 4 MOV instructions
 | 
			
		||||
; CHECK: @main
 | 
			
		||||
; CHECK: {{^}}main:
 | 
			
		||||
; CHECK: MOV
 | 
			
		||||
; CHECK: MOV
 | 
			
		||||
; CHECK: MOV
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ declare i32 @llvm.r600.read.tidig.x() #0
 | 
			
		||||
declare void @llvm.AMDGPU.barrier.local() #1
 | 
			
		||||
 | 
			
		||||
; Function Attrs: nounwind
 | 
			
		||||
; CHECK-LABEL: @signed_ds_offset_addressing_loop
 | 
			
		||||
; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
 | 
			
		||||
; CHECK: BB0_1:
 | 
			
		||||
; CHECK: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
 | 
			
		||||
; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @anyext_load_i8:
 | 
			
		||||
; FUNC-LABEL: {{^}}anyext_load_i8:
 | 
			
		||||
; EG: AND_INT
 | 
			
		||||
; EG: 255
 | 
			
		||||
define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
 | 
			
		||||
@@ -13,7 +13,7 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @anyext_load_i16:
 | 
			
		||||
; FUNC-LABEL: {{^}}anyext_load_i16:
 | 
			
		||||
; EG: AND_INT
 | 
			
		||||
; EG: AND_INT
 | 
			
		||||
; EG-DAG: 65535
 | 
			
		||||
@@ -27,7 +27,7 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @anyext_load_lds_i8:
 | 
			
		||||
; FUNC-LABEL: {{^}}anyext_load_lds_i8:
 | 
			
		||||
; EG: AND_INT
 | 
			
		||||
; EG: 255
 | 
			
		||||
define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
 | 
			
		||||
@@ -39,7 +39,7 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @anyext_load_lds_i16:
 | 
			
		||||
; FUNC-LABEL: {{^}}anyext_load_lds_i16:
 | 
			
		||||
; EG: AND_INT
 | 
			
		||||
; EG: AND_INT
 | 
			
		||||
; EG-DAG: 65535
 | 
			
		||||
@@ -53,7 +53,7 @@ define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 a
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @sextload_global_i8_to_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
 | 
			
		||||
; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
 | 
			
		||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
@@ -64,7 +64,7 @@ define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @sextload_global_i16_to_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
 | 
			
		||||
; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
 | 
			
		||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
@@ -75,7 +75,7 @@ define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @sextload_global_i32_to_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
 | 
			
		||||
; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
@@ -86,7 +86,7 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @zextload_global_i8_to_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
 | 
			
		||||
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
 | 
			
		||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
 | 
			
		||||
@@ -98,7 +98,7 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @zextload_global_i16_to_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
 | 
			
		||||
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
 | 
			
		||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
 | 
			
		||||
@@ -110,7 +110,7 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @zextload_global_i32_to_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
 | 
			
		||||
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
 | 
			
		||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @extract_vector_elt_v2i16
 | 
			
		||||
; FUNC-LABEL: {{^}}extract_vector_elt_v2i16:
 | 
			
		||||
; SI: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI: BUFFER_STORE_SHORT
 | 
			
		||||
@@ -14,7 +14,7 @@ define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) no
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @extract_vector_elt_v4i16
 | 
			
		||||
; FUNC-LABEL: {{^}}extract_vector_elt_v4i16:
 | 
			
		||||
; SI: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI: BUFFER_STORE_SHORT
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@ declare double @llvm.fabs.f64(double) readnone
 | 
			
		||||
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
 | 
			
		||||
declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_fabs_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}v_fabs_f64:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
 | 
			
		||||
@@ -20,7 +20,7 @@ define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_f64:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI-NOT: V_AND_B32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -30,7 +30,7 @@ define void @fabs_f64(double addrspace(1)* %out, double %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_v2f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_v2f64:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -40,7 +40,7 @@ define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_v4f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_v4f64:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
@@ -52,7 +52,7 @@ define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @fabs_fold_f64
 | 
			
		||||
; SI-LABEL: {{^}}fabs_fold_f64:
 | 
			
		||||
; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
 | 
			
		||||
; SI-NOT: AND
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
@@ -64,7 +64,7 @@ define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @fabs_fn_fold_f64
 | 
			
		||||
; SI-LABEL: {{^}}fabs_fn_fold_f64:
 | 
			
		||||
; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
 | 
			
		||||
; SI-NOT: AND
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
@@ -76,7 +76,7 @@ define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_free_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_free_f64:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
@@ -86,7 +86,7 @@ define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_fn_free_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_fn_free_f64:
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
 | 
			
		||||
; unless isFabsFree returns true
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_fn_free
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_fn_free:
 | 
			
		||||
; R600-NOT: AND
 | 
			
		||||
; R600: |PV.{{[XYZW]}}|
 | 
			
		||||
 | 
			
		||||
@@ -19,7 +19,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_free
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_free:
 | 
			
		||||
; R600-NOT: AND
 | 
			
		||||
; R600: |PV.{{[XYZW]}}|
 | 
			
		||||
 | 
			
		||||
@@ -32,7 +32,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_f32:
 | 
			
		||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 | 
			
		||||
 | 
			
		||||
; SI: V_AND_B32
 | 
			
		||||
@@ -42,7 +42,7 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_v2f32:
 | 
			
		||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 | 
			
		||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 | 
			
		||||
 | 
			
		||||
@@ -54,7 +54,7 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fabs_v4
 | 
			
		||||
; FUNC-LABEL: {{^}}fabs_v4f32:
 | 
			
		||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 | 
			
		||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 | 
			
		||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 | 
			
		||||
@@ -70,7 +70,7 @@ define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @fabs_fn_fold
 | 
			
		||||
; SI-LABEL: {{^}}fabs_fn_fold:
 | 
			
		||||
; SI: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
 | 
			
		||||
; SI-NOT: AND
 | 
			
		||||
; SI: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
 | 
			
		||||
@@ -81,7 +81,7 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @fabs_fold
 | 
			
		||||
; SI-LABEL: {{^}}fabs_fold:
 | 
			
		||||
; SI: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
 | 
			
		||||
; SI-NOT: AND
 | 
			
		||||
; SI: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fadd_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fadd_f32:
 | 
			
		||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
 | 
			
		||||
; SI: V_ADD_F32
 | 
			
		||||
define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
 | 
			
		||||
@@ -10,7 +10,7 @@ define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fadd_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fadd_v2f32:
 | 
			
		||||
; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
 | 
			
		||||
; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
 | 
			
		||||
; SI: V_ADD_F32
 | 
			
		||||
@@ -21,7 +21,7 @@ define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fadd_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fadd_v4f32:
 | 
			
		||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
@@ -39,7 +39,7 @@ define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fadd_v8f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fadd_v8f32:
 | 
			
		||||
; R600: ADD
 | 
			
		||||
; R600: ADD
 | 
			
		||||
; R600: ADD
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK: @fadd_f64
 | 
			
		||||
; CHECK: {{^}}fadd_f64:
 | 
			
		||||
; CHECK: V_ADD_F64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
 | 
			
		||||
 | 
			
		||||
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
 | 
			
		||||
declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
 | 
			
		||||
declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_f32:
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 | 
			
		||||
; EG: CEIL {{\*? *}}[[RESULT]]
 | 
			
		||||
@@ -18,7 +18,7 @@ define void @fceil_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v2f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v2f32:
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
 | 
			
		||||
@@ -30,7 +30,7 @@ define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v3f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v3f32:
 | 
			
		||||
; FIXME-SI: V_CEIL_F32_e32
 | 
			
		||||
; FIXME-SI: V_CEIL_F32_e32
 | 
			
		||||
; FIXME-SI: V_CEIL_F32_e32
 | 
			
		||||
@@ -46,7 +46,7 @@ define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v4f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v4f32:
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
@@ -62,7 +62,7 @@ define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v8f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v8f32:
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
@@ -87,7 +87,7 @@ define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v16f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v16f32:
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
; SI: V_CEIL_F32_e32
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
 | 
			
		||||
declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
 | 
			
		||||
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_f64:
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
 | 
			
		||||
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
 | 
			
		||||
@@ -34,7 +34,7 @@ define void @fceil_f64(double addrspace(1)* %out, double %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v2f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v2f64:
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 | 
			
		||||
@@ -43,7 +43,7 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FIXME-FUNC-LABEL: @fceil_v3f64:
 | 
			
		||||
; FIXME-FUNC-LABEL: {{^}}fceil_v3f64:
 | 
			
		||||
; FIXME-CI: V_CEIL_F64_e32
 | 
			
		||||
; FIXME-CI: V_CEIL_F64_e32
 | 
			
		||||
; FIXME-CI: V_CEIL_F64_e32
 | 
			
		||||
@@ -53,7 +53,7 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 | 
			
		||||
;   ret void
 | 
			
		||||
; }
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v4f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v4f64:
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
@@ -64,7 +64,7 @@ define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v8f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v8f64:
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
@@ -79,7 +79,7 @@ define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fceil_v16f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}fceil_v16f64:
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
; CI: V_CEIL_F64_e32
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK: @fcmp_sext
 | 
			
		||||
; CHECK: {{^}}fcmp_sext:
 | 
			
		||||
; CHECK: SETE_DX10  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
 | 
			
		||||
define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 | 
			
		||||
@@ -18,7 +18,7 @@ entry:
 | 
			
		||||
; SET*_DX10 instruction.  Previously we were lowering this to:
 | 
			
		||||
; SET* + FP_TO_SINT
 | 
			
		||||
 | 
			
		||||
; CHECK: @fcmp_br
 | 
			
		||||
; CHECK: {{^}}fcmp_br:
 | 
			
		||||
; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}}
 | 
			
		||||
; CHECK-NEXT {{[0-9]+(5.0}}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK: @flt_f64
 | 
			
		||||
; CHECK: {{^}}flt_f64:
 | 
			
		||||
; CHECK: V_CMP_LT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 | 
			
		||||
 | 
			
		||||
define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
@@ -13,7 +13,7 @@ define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: @fle_f64
 | 
			
		||||
; CHECK: {{^}}fle_f64:
 | 
			
		||||
; CHECK: V_CMP_LE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 | 
			
		||||
 | 
			
		||||
define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
@@ -26,7 +26,7 @@ define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: @fgt_f64
 | 
			
		||||
; CHECK: {{^}}fgt_f64:
 | 
			
		||||
; CHECK: V_CMP_GT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 | 
			
		||||
 | 
			
		||||
define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
@@ -39,7 +39,7 @@ define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: @fge_f64
 | 
			
		||||
; CHECK: {{^}}fge_f64:
 | 
			
		||||
; CHECK: V_CMP_GE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 | 
			
		||||
 | 
			
		||||
define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
@@ -52,7 +52,7 @@ define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: @fne_f64
 | 
			
		||||
; CHECK: {{^}}fne_f64:
 | 
			
		||||
; CHECK: V_CMP_NEQ_F64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 | 
			
		||||
 | 
			
		||||
define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
@@ -65,7 +65,7 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: @feq_f64
 | 
			
		||||
; CHECK: {{^}}feq_f64:
 | 
			
		||||
; CHECK: V_CMP_EQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 | 
			
		||||
 | 
			
		||||
define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK: @fconst_f64
 | 
			
		||||
; CHECK: {{^}}fconst_f64:
 | 
			
		||||
; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000
 | 
			
		||||
; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@ declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind read
 | 
			
		||||
declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; Try to identify arg based on higher address.
 | 
			
		||||
; FUNC-LABEL: @test_copysign_f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copysign_f32:
 | 
			
		||||
; SI: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
 | 
			
		||||
; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
 | 
			
		||||
; SI-DAG: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
 | 
			
		||||
@@ -24,7 +24,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copysign_v2f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copysign_v2f32:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
 | 
			
		||||
; EG: BFI_INT
 | 
			
		||||
@@ -35,7 +35,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copysign_v4f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copysign_v4f32:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
 | 
			
		||||
; EG: BFI_INT
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@ declare double @llvm.copysign.f64(double, double) nounwind readnone
 | 
			
		||||
declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
 | 
			
		||||
declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copysign_f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copysign_f64:
 | 
			
		||||
; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
			
		||||
; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
 | 
			
		||||
; SI-DAG: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
 | 
			
		||||
@@ -20,7 +20,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copysign_v2f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copysign_v2f64:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
 | 
			
		||||
  %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
 | 
			
		||||
@@ -28,7 +28,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_copysign_v4f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}test_copysign_v4f64:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
 | 
			
		||||
  %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@
 | 
			
		||||
; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
 | 
			
		||||
; instruction groups.
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fdiv_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fdiv_f32:
 | 
			
		||||
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
 | 
			
		||||
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
 | 
			
		||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
 | 
			
		||||
@@ -22,7 +22,7 @@ entry:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fdiv_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fdiv_v2f32:
 | 
			
		||||
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
 | 
			
		||||
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
 | 
			
		||||
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
 | 
			
		||||
@@ -39,7 +39,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fdiv_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fdiv_v4f32:
 | 
			
		||||
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK: @fdiv_f64
 | 
			
		||||
; CHECK: {{^}}fdiv_f64:
 | 
			
		||||
; CHECK: V_RCP_F64_e32 {{v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=rv670 | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; R600 supports 8 fetches in a clause
 | 
			
		||||
; CHECK: @fetch_limits_r600
 | 
			
		||||
; CHECK: {{^}}fetch_limits_r600:
 | 
			
		||||
; CHECK: Fetch clause
 | 
			
		||||
; CHECK: Fetch clause
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -12,7 +12,7 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; r700+ supports 16 fetches in a clause
 | 
			
		||||
; CHECK: @fetch_limits_r700
 | 
			
		||||
; CHECK: {{^}}fetch_limits_r700:
 | 
			
		||||
; CHECK: Fetch clause
 | 
			
		||||
; CHECK: Fetch clause
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
 | 
			
		||||
declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
 | 
			
		||||
declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ffloor_f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}ffloor_f64:
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
 | 
			
		||||
; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
 | 
			
		||||
@@ -35,7 +35,7 @@ define void @ffloor_f64(double addrspace(1)* %out, double %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ffloor_v2f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}ffloor_v2f64:
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 | 
			
		||||
@@ -44,7 +44,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FIXME-FUNC-LABEL: @ffloor_v3f64:
 | 
			
		||||
; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
 | 
			
		||||
; FIXME-CI: V_FLOOR_F64_e32
 | 
			
		||||
; FIXME-CI: V_FLOOR_F64_e32
 | 
			
		||||
; FIXME-CI: V_FLOOR_F64_e32
 | 
			
		||||
@@ -54,7 +54,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 | 
			
		||||
;   ret void
 | 
			
		||||
; }
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ffloor_v4f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}ffloor_v4f64:
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
@@ -65,7 +65,7 @@ define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ffloor_v8f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}ffloor_v8f64:
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
@@ -80,7 +80,7 @@ define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ffloor_v16f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}ffloor_v16f64:
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
; CI: V_FLOOR_F64_e32
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@
 | 
			
		||||
; specialize away generic pointer accesses.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @branch_use_flat_i32:
 | 
			
		||||
; CHECK-LABEL: {{^}}branch_use_flat_i32:
 | 
			
		||||
; CHECK: FLAT_STORE_DWORD {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
 | 
			
		||||
; CHECK: S_ENDPGM
 | 
			
		||||
define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
 | 
			
		||||
@@ -34,7 +34,7 @@ end:
 | 
			
		||||
; These testcases might become useless when there are optimizations to
 | 
			
		||||
; remove generic pointers.
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_flat_i32:
 | 
			
		||||
; CHECK-LABEL: {{^}}store_flat_i32:
 | 
			
		||||
; CHECK: V_MOV_B32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
 | 
			
		||||
; CHECK: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
 | 
			
		||||
; CHECK: V_MOV_B32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
 | 
			
		||||
@@ -45,7 +45,7 @@ define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_flat_i64:
 | 
			
		||||
; CHECK-LABEL: {{^}}store_flat_i64:
 | 
			
		||||
; CHECK: FLAT_STORE_DWORDX2
 | 
			
		||||
define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
 | 
			
		||||
  %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
 | 
			
		||||
@@ -53,7 +53,7 @@ define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_flat_v4i32:
 | 
			
		||||
; CHECK-LABEL: {{^}}store_flat_v4i32:
 | 
			
		||||
; CHECK: FLAT_STORE_DWORDX4
 | 
			
		||||
define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
 | 
			
		||||
  %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
 | 
			
		||||
@@ -61,7 +61,7 @@ define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_flat_trunc_i16:
 | 
			
		||||
; CHECK-LABEL: {{^}}store_flat_trunc_i16:
 | 
			
		||||
; CHECK: FLAT_STORE_SHORT
 | 
			
		||||
define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
 | 
			
		||||
  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
 | 
			
		||||
@@ -70,7 +70,7 @@ define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_flat_trunc_i8:
 | 
			
		||||
; CHECK-LABEL: {{^}}store_flat_trunc_i8:
 | 
			
		||||
; CHECK: FLAT_STORE_BYTE
 | 
			
		||||
define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
 | 
			
		||||
  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
 | 
			
		||||
@@ -154,7 +154,7 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
 | 
			
		||||
; scratch allocations again.
 | 
			
		||||
 | 
			
		||||
; Check for prologue initializing special SGPRs pointing to scratch.
 | 
			
		||||
; CHECK-LABEL: @store_flat_scratch:
 | 
			
		||||
; CHECK-LABEL: {{^}}store_flat_scratch:
 | 
			
		||||
; CHECK: S_MOVK_I32 flat_scratch_lo, 0
 | 
			
		||||
; CHECK-NO-PROMOTE: S_MOVK_I32 flat_scratch_hi, 40
 | 
			
		||||
; CHECK-PROMOTE: S_MOVK_I32 flat_scratch_hi, 0
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) n
 | 
			
		||||
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fma_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fma_f64:
 | 
			
		||||
; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
 | 
			
		||||
@@ -17,7 +17,7 @@ define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fma_v2f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fma_v2f64:
 | 
			
		||||
; SI: V_FMA_F64
 | 
			
		||||
; SI: V_FMA_F64
 | 
			
		||||
define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
 | 
			
		||||
@@ -30,7 +30,7 @@ define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fma_v4f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fma_v4f64:
 | 
			
		||||
; SI: V_FMA_F64
 | 
			
		||||
; SI: V_FMA_F64
 | 
			
		||||
; SI: V_FMA_F64
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
 | 
			
		||||
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
 | 
			
		||||
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fma_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fma_f32:
 | 
			
		||||
; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
 | 
			
		||||
define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
 | 
			
		||||
                     float addrspace(1)* %in2, float addrspace(1)* %in3) {
 | 
			
		||||
@@ -17,7 +17,7 @@ define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fma_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fma_v2f32:
 | 
			
		||||
; SI: V_FMA_F32
 | 
			
		||||
; SI: V_FMA_F32
 | 
			
		||||
define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
 | 
			
		||||
@@ -30,7 +30,7 @@ define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)*
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fma_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fma_v4f32:
 | 
			
		||||
; SI: V_FMA_F32
 | 
			
		||||
; SI: V_FMA_F32
 | 
			
		||||
; SI: V_FMA_F32
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fmul_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fmul_f32:
 | 
			
		||||
; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
 | 
			
		||||
 | 
			
		||||
; SI: V_MUL_F32
 | 
			
		||||
@@ -17,7 +17,7 @@ declare float @llvm.R600.load.input(i32) readnone
 | 
			
		||||
 | 
			
		||||
declare void @llvm.AMDGPU.store.output(float, i32)
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fmul_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fmul_v2f32:
 | 
			
		||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
 | 
			
		||||
 | 
			
		||||
@@ -30,7 +30,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fmul_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fmul_v4f32:
 | 
			
		||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
@@ -49,7 +49,7 @@ define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_mul_2_k
 | 
			
		||||
; FUNC-LABEL: {{^}}test_mul_2_k:
 | 
			
		||||
; SI: V_MUL_F32
 | 
			
		||||
; SI-NOT: V_MUL_F32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -60,7 +60,7 @@ define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_mul_2_k_inv
 | 
			
		||||
; FUNC-LABEL: {{^}}test_mul_2_k_inv:
 | 
			
		||||
; SI: V_MUL_F32
 | 
			
		||||
; SI-NOT: V_MUL_F32
 | 
			
		||||
; SI-NOT: V_MAD_F32
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fmul_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fmul_f64:
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
                      double addrspace(1)* %in2) {
 | 
			
		||||
@@ -11,7 +11,7 @@ define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fmul_v2f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fmul_v2f64:
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
 | 
			
		||||
@@ -23,7 +23,7 @@ define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fmul_v4f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fmul_v4f64:
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,7 @@ declare double @llvm.fmuladd.f64(double, double, double)
 | 
			
		||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
			
		||||
declare float @llvm.fabs.f32(float) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fmuladd_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fmuladd_f32:
 | 
			
		||||
; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
 | 
			
		||||
 | 
			
		||||
define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
 | 
			
		||||
@@ -18,7 +18,7 @@ define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fmuladd_f64
 | 
			
		||||
; CHECK-LABEL: {{^}}fmuladd_f64:
 | 
			
		||||
; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
 | 
			
		||||
define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
@@ -31,7 +31,7 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fmuladd_2.0_a_b_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
 | 
			
		||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
 | 
			
		||||
@@ -50,7 +50,7 @@ define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fmuladd_a_2.0_b_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
 | 
			
		||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
 | 
			
		||||
@@ -69,7 +69,7 @@ define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fadd_a_a_b_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
 | 
			
		||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
 | 
			
		||||
@@ -91,7 +91,7 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fadd_b_a_a_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
 | 
			
		||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
 | 
			
		||||
@@ -113,7 +113,7 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fmuladd_neg_2.0_a_b_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
 | 
			
		||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
 | 
			
		||||
@@ -133,7 +133,7 @@ define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fmuladd_neg_2.0_neg_a_b_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
 | 
			
		||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
 | 
			
		||||
@@ -155,7 +155,7 @@ define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspa
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fmuladd_2.0_neg_a_b_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
 | 
			
		||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
 | 
			
		||||
@@ -177,7 +177,7 @@ define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @fmuladd_2.0_a_neg_b_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 | 
			
		||||
; CHECK-DAG: BUFFER_LOAD_DWORD [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
 | 
			
		||||
; CHECK: V_MAD_F32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@
 | 
			
		||||
; FIXME: Check something here. Currently it seems fabs + fneg aren't
 | 
			
		||||
; into 2 modifiers, although theoretically that should work.
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_fadd_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
 | 
			
		||||
; SI: V_AND_B32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
 | 
			
		||||
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
 | 
			
		||||
@@ -25,7 +25,7 @@ define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_fmul_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f64:
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
 | 
			
		||||
define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
 | 
			
		||||
  %fabs = call double @llvm.fabs.f64(double %x)
 | 
			
		||||
@@ -35,7 +35,7 @@ define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_free_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_free_f64:
 | 
			
		||||
define void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
  %bc = bitcast i64 %in to double
 | 
			
		||||
  %fabs = call double @llvm.fabs.f64(double %bc)
 | 
			
		||||
@@ -44,7 +44,7 @@ define void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_fn_free_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f64:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
 | 
			
		||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
 | 
			
		||||
define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
@@ -55,7 +55,7 @@ define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_f64:
 | 
			
		||||
; SI: S_LOAD_DWORDX2
 | 
			
		||||
; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
 | 
			
		||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
 | 
			
		||||
@@ -69,7 +69,7 @@ define void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_v2f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_v2f64:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
 | 
			
		||||
; SI-NOT: 0x80000000
 | 
			
		||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
 | 
			
		||||
@@ -81,7 +81,7 @@ define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_v4f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_v4f64:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
 | 
			
		||||
; SI-NOT: 0x80000000
 | 
			
		||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_fadd_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
 | 
			
		||||
; SI-NOT: AND
 | 
			
		||||
; SI: V_SUB_F32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
 | 
			
		||||
define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
 | 
			
		||||
@@ -12,7 +12,7 @@ define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_fmul_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
 | 
			
		||||
; SI-NOT: AND
 | 
			
		||||
; SI: V_MUL_F32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
 | 
			
		||||
; SI-NOT: AND
 | 
			
		||||
@@ -28,7 +28,7 @@ define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
 | 
			
		||||
; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
 | 
			
		||||
; unless isFabsFree returns true
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_free_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_free_f32:
 | 
			
		||||
; R600-NOT: AND
 | 
			
		||||
; R600: |PV.{{[XYZW]}}|
 | 
			
		||||
; R600: -PV
 | 
			
		||||
@@ -43,7 +43,7 @@ define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_fn_free_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f32:
 | 
			
		||||
; R600-NOT: AND
 | 
			
		||||
; R600: |PV.{{[XYZW]}}|
 | 
			
		||||
; R600: -PV
 | 
			
		||||
@@ -58,7 +58,7 @@ define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_f32:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
 | 
			
		||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
 | 
			
		||||
define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
 | 
			
		||||
@@ -68,7 +68,7 @@ define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_fneg_fabs_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
 | 
			
		||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
 | 
			
		||||
define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 | 
			
		||||
  %val = load float addrspace(1)* %in, align 4
 | 
			
		||||
@@ -78,7 +78,7 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_v2f32:
 | 
			
		||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 | 
			
		||||
; R600: -PV
 | 
			
		||||
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 | 
			
		||||
@@ -97,7 +97,7 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FIXME: SGPR should be used directly for first src operand.
 | 
			
		||||
; FUNC-LABEL: @fneg_fabs_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[IMMREG:v[0-9]+]], 0x80000000
 | 
			
		||||
; SI-NOT: 0x80000000
 | 
			
		||||
; SI: V_OR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_f64:
 | 
			
		||||
; SI: V_XOR_B32
 | 
			
		||||
define void @fneg_f64(double addrspace(1)* %out, double %in) {
 | 
			
		||||
  %fneg = fsub double -0.000000e+00, %in
 | 
			
		||||
@@ -8,7 +8,7 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_v2f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_v2f64:
 | 
			
		||||
; SI: V_XOR_B32
 | 
			
		||||
; SI: V_XOR_B32
 | 
			
		||||
define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
 | 
			
		||||
@@ -17,7 +17,7 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_v4f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_v4f64:
 | 
			
		||||
; R600: -PV
 | 
			
		||||
; R600: -T
 | 
			
		||||
; R600: -PV
 | 
			
		||||
@@ -37,7 +37,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
 | 
			
		||||
; (fneg (f64 bitcast (i64 a))) => (f64 bitcast (xor (i64 a), 0x80000000))
 | 
			
		||||
; unless the target returns true for isNegFree()
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_free_f64
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_free_f64:
 | 
			
		||||
; FIXME: Unnecessary copy to VGPRs
 | 
			
		||||
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
 | 
			
		||||
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
@@ -47,7 +47,7 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @fneg_fold
 | 
			
		||||
; SI-LABEL: {{^}}fneg_fold_f64:
 | 
			
		||||
; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
 | 
			
		||||
; SI-NOT: XOR
 | 
			
		||||
; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_f32:
 | 
			
		||||
; R600: -PV
 | 
			
		||||
 | 
			
		||||
; SI: V_XOR_B32
 | 
			
		||||
@@ -11,7 +11,7 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_v2f32:
 | 
			
		||||
; R600: -PV
 | 
			
		||||
; R600: -PV
 | 
			
		||||
 | 
			
		||||
@@ -23,7 +23,7 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_v4f32:
 | 
			
		||||
; R600: -PV
 | 
			
		||||
; R600: -T
 | 
			
		||||
; R600: -PV
 | 
			
		||||
@@ -43,7 +43,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i
 | 
			
		||||
; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
 | 
			
		||||
; unless the target returns true for isNegFree()
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_free_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_free_f32:
 | 
			
		||||
; R600-NOT: XOR
 | 
			
		||||
; R600: -KC0[2].Z
 | 
			
		||||
 | 
			
		||||
@@ -56,7 +56,7 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fneg_fold
 | 
			
		||||
; FUNC-LABEL: {{^}}fneg_fold_f32:
 | 
			
		||||
; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
 | 
			
		||||
; SI-NOT: XOR
 | 
			
		||||
; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@
 | 
			
		||||
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
 | 
			
		||||
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_convert_fp16_to_fp32:
 | 
			
		||||
; SI-LABEL: {{^}}test_convert_fp16_to_fp32:
 | 
			
		||||
; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
 | 
			
		||||
; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[RESULT]]
 | 
			
		||||
@@ -15,7 +15,7 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_convert_fp16_to_fp64:
 | 
			
		||||
; SI-LABEL: {{^}}test_convert_fp16_to_fp64:
 | 
			
		||||
; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
 | 
			
		||||
; SI: V_CVT_F32_F16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
 | 
			
		||||
; SI: V_CVT_F64_F32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_convert_fp32_to_fp16:
 | 
			
		||||
; SI-LABEL: {{^}}test_convert_fp32_to_fp16:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
 | 
			
		||||
; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]]
 | 
			
		||||
; SI: BUFFER_STORE_SHORT [[RESULT]]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_sint_f64_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_sint_f64_i32:
 | 
			
		||||
; SI: V_CVT_I32_F64_e32
 | 
			
		||||
define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
 | 
			
		||||
  %result = fptosi double %in to i32
 | 
			
		||||
@@ -8,7 +8,7 @@ define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_sint_v2f64_v2i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_sint_v2f64_v2i32:
 | 
			
		||||
; SI: V_CVT_I32_F64_e32
 | 
			
		||||
; SI: V_CVT_I32_F64_e32
 | 
			
		||||
define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
 | 
			
		||||
@@ -17,7 +17,7 @@ define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_sint_v4f64_v4i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_sint_v4f64_v4i32:
 | 
			
		||||
; SI: V_CVT_I32_F64_e32
 | 
			
		||||
; SI: V_CVT_I32_F64_e32
 | 
			
		||||
; SI: V_CVT_I32_F64_e32
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_sint_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_sint_i32:
 | 
			
		||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
; SI: V_CVT_I32_F32_e32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -11,7 +11,7 @@ define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_sint_v2i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
 | 
			
		||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
; SI: V_CVT_I32_F32_e32
 | 
			
		||||
@@ -22,7 +22,7 @@ define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_sint_v4i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_sint_v4i32:
 | 
			
		||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
 | 
			
		||||
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
@@ -38,7 +38,7 @@ define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_sint_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_sint_i64:
 | 
			
		||||
 | 
			
		||||
; EG-DAG: AND_INT
 | 
			
		||||
; EG-DAG: LSHR
 | 
			
		||||
@@ -71,7 +71,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC: @fp_to_sint_v2i64
 | 
			
		||||
; FUNC: {{^}}fp_to_sint_v2i64:
 | 
			
		||||
; EG-DAG: AND_INT
 | 
			
		||||
; EG-DAG: LSHR
 | 
			
		||||
; EG-DAG: SUB_INT
 | 
			
		||||
@@ -122,7 +122,7 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC: @fp_to_sint_v4i64
 | 
			
		||||
; FUNC: {{^}}fp_to_sint_v4i64:
 | 
			
		||||
; EG-DAG: AND_INT
 | 
			
		||||
; EG-DAG: LSHR
 | 
			
		||||
; EG-DAG: SUB_INT
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @fp_to_uint_i32_f64
 | 
			
		||||
; SI-LABEL: {{^}}fp_to_uint_i32_f64:
 | 
			
		||||
; SI: V_CVT_U32_F64_e32
 | 
			
		||||
define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
 | 
			
		||||
  %cast = fptoui double %in to i32
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_uint_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_uint_i32:
 | 
			
		||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
; SI: V_CVT_U32_F32_e32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -11,7 +11,7 @@ define void @fp_to_uint_i32 (i32 addrspace(1)* %out, float %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_uint_v2i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_uint_v2i32:
 | 
			
		||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; SI: V_CVT_U32_F32_e32
 | 
			
		||||
@@ -23,7 +23,7 @@ define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fp_to_uint_v4i32
 | 
			
		||||
; FUNC-LABEL: {{^}}fp_to_uint_v4i32:
 | 
			
		||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 | 
			
		||||
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 | 
			
		||||
@@ -40,7 +40,7 @@ define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC: @fp_to_uint_i64
 | 
			
		||||
; FUNC: {{^}}fp_to_uint_i64:
 | 
			
		||||
; EG-DAG: AND_INT
 | 
			
		||||
; EG-DAG: LSHR
 | 
			
		||||
; EG-DAG: SUB_INT
 | 
			
		||||
@@ -70,7 +70,7 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC: @fp_to_uint_v2i64
 | 
			
		||||
; FUNC: {{^}}fp_to_uint_v2i64:
 | 
			
		||||
; EG-DAG: AND_INT
 | 
			
		||||
; EG-DAG: LSHR
 | 
			
		||||
; EG-DAG: SUB_INT
 | 
			
		||||
@@ -121,7 +121,7 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC: @fp_to_uint_v4i64
 | 
			
		||||
; FUNC: {{^}}fp_to_uint_v4i64:
 | 
			
		||||
; EG-DAG: AND_INT
 | 
			
		||||
; EG-DAG: LSHR
 | 
			
		||||
; EG-DAG: SUB_INT
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
 | 
			
		||||
 | 
			
		||||
; CHECK: @fpext
 | 
			
		||||
; CHECK: {{^}}fpext:
 | 
			
		||||
; CHECK: V_CVT_F64_F32_e32
 | 
			
		||||
define void @fpext(double addrspace(1)* %out, float %in) {
 | 
			
		||||
  %result = fpext float %in to double
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
 | 
			
		||||
 | 
			
		||||
; CHECK: @fptrunc
 | 
			
		||||
; CHECK: {{^}}fptrunc:
 | 
			
		||||
; CHECK: V_CVT_F32_F64_e32
 | 
			
		||||
define void @fptrunc(float addrspace(1)* %out, double %in) {
 | 
			
		||||
  %result = fptrunc double %in to float
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @frem_f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}frem_f32:
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*$}}
 | 
			
		||||
; SI-DAG: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
 | 
			
		||||
; SI-DAG: V_CMP
 | 
			
		||||
@@ -21,7 +21,7 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @unsafe_frem_f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}unsafe_frem_f32:
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[Y:v[0-9]+]], {{.*}} offset:0x10
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{.*}}
 | 
			
		||||
; SI: V_RCP_F32_e32 [[INVY:v[0-9]+]], [[Y]]
 | 
			
		||||
@@ -43,7 +43,7 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
 | 
			
		||||
; TODO: This should check something when f64 fdiv is implemented
 | 
			
		||||
; correctly
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @frem_f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}frem_f64:
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
                      double addrspace(1)* %in2) #0 {
 | 
			
		||||
@@ -54,7 +54,7 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @unsafe_frem_f64:
 | 
			
		||||
; FUNC-LABEL: {{^}}unsafe_frem_f64:
 | 
			
		||||
; SI: V_RCP_F64_e32
 | 
			
		||||
; SI: V_MUL_F64
 | 
			
		||||
; SI: V_BFE_I32
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK: @fsqrt_f32
 | 
			
		||||
; CHECK: {{^}}fsqrt_f32:
 | 
			
		||||
; CHECK: V_SQRT_F32_e32 {{v[0-9]+, v[0-9]+}}
 | 
			
		||||
 | 
			
		||||
define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 | 
			
		||||
@@ -10,7 +10,7 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 | 
			
		||||
   ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: @fsqrt_f64
 | 
			
		||||
; CHECK: {{^}}fsqrt_f64:
 | 
			
		||||
; CHECK: V_SQRT_F64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
 | 
			
		||||
define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_fsub_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}v_fsub_f32:
 | 
			
		||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 | 
			
		||||
  %b_ptr = getelementptr float addrspace(1)* %in, i32 1
 | 
			
		||||
@@ -13,7 +13,7 @@ define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_fsub_f32
 | 
			
		||||
; FUNC-LABEL: {{^}}s_fsub_f32:
 | 
			
		||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
 | 
			
		||||
 | 
			
		||||
; SI: V_SUB_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
@@ -27,7 +27,7 @@ declare float @llvm.R600.load.input(i32) readnone
 | 
			
		||||
 | 
			
		||||
declare void @llvm.AMDGPU.store.output(float, i32)
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @fsub_v2f32
 | 
			
		||||
; FUNC-LABEL: {{^}}fsub_v2f32:
 | 
			
		||||
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
 | 
			
		||||
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
 | 
			
		||||
 | 
			
		||||
@@ -40,7 +40,7 @@ define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_fsub_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}v_fsub_v4f32:
 | 
			
		||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
 | 
			
		||||
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
 | 
			
		||||
@@ -61,7 +61,7 @@ define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(
 | 
			
		||||
 | 
			
		||||
; FIXME: Should be using SGPR directly for first operand
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_fsub_v4f32
 | 
			
		||||
; FUNC-LABEL: {{^}}s_fsub_v4f32:
 | 
			
		||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
; SI: V_SUBREV_F32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @fsub_f64:
 | 
			
		||||
; SI-LABEL: {{^}}fsub_f64:
 | 
			
		||||
; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 | 
			
		||||
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 | 
			
		||||
                      double addrspace(1)* %in2) {
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
 | 
			
		||||
declare <8 x float> @llvm.trunc.v8f32(<8 x float>) nounwind readnone
 | 
			
		||||
declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ftrunc_f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}ftrunc_f32:
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; SI: V_TRUNC_F32_e32
 | 
			
		||||
define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
@@ -17,7 +17,7 @@ define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ftrunc_v2f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}ftrunc_v2f32:
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; SI: V_TRUNC_F32_e32
 | 
			
		||||
@@ -28,7 +28,7 @@ define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FIXME-FUNC-LABEL: @ftrunc_v3f32:
 | 
			
		||||
; FIXME-FUNC-LABEL: {{^}}ftrunc_v3f32:
 | 
			
		||||
; FIXME-EG: TRUNC
 | 
			
		||||
; FIXME-EG: TRUNC
 | 
			
		||||
; FIXME-EG: TRUNC
 | 
			
		||||
@@ -41,7 +41,7 @@ define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
 | 
			
		||||
;   ret void
 | 
			
		||||
; }
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ftrunc_v4f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}ftrunc_v4f32:
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
@@ -56,7 +56,7 @@ define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ftrunc_v8f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}ftrunc_v8f32:
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
@@ -79,7 +79,7 @@ define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @ftrunc_v16f32:
 | 
			
		||||
; FUNC-LABEL: {{^}}ftrunc_v16f32:
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
; EG: TRUNC
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
 | 
			
		||||
 | 
			
		||||
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
 | 
			
		||||
; CHECK-LABEL: @use_gep_address_space:
 | 
			
		||||
; CHECK-LABEL: {{^}}use_gep_address_space:
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
 | 
			
		||||
; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 0x40
 | 
			
		||||
  %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
 | 
			
		||||
@@ -11,7 +11,7 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
 | 
			
		||||
; CHECK-LABEL: @use_gep_address_space_large_offset:
 | 
			
		||||
; CHECK-LABEL: {{^}}use_gep_address_space_large_offset:
 | 
			
		||||
; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
 | 
			
		||||
; SI, which is why it is being OR'd with the base pointer.
 | 
			
		||||
; SI: S_OR_B32
 | 
			
		||||
@@ -23,7 +23,7 @@ define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %arra
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
 | 
			
		||||
; CHECK-LABEL: @gep_as_vector_v4:
 | 
			
		||||
; CHECK-LABEL: {{^}}gep_as_vector_v4:
 | 
			
		||||
; CHECK: S_ADD_I32
 | 
			
		||||
; CHECK: S_ADD_I32
 | 
			
		||||
; CHECK: S_ADD_I32
 | 
			
		||||
@@ -41,7 +41,7 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
 | 
			
		||||
; CHECK-LABEL: @gep_as_vector_v2:
 | 
			
		||||
; CHECK-LABEL: {{^}}gep_as_vector_v2:
 | 
			
		||||
; CHECK: S_ADD_I32
 | 
			
		||||
; CHECK: S_ADD_I32
 | 
			
		||||
  %p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_load_i32_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_load_i32_offset:
 | 
			
		||||
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 | 
			
		||||
define void @atomic_load_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -8,7 +8,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_load_i32_ret_offset
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_load_i32_ret_offset:
 | 
			
		||||
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[RET]]
 | 
			
		||||
define void @atomic_load_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 | 
			
		||||
@@ -18,7 +18,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_load_i32_addr64
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_load_i32_addr64:
 | 
			
		||||
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
define void @atomic_load_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -27,7 +27,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @atomic_load_i32_ret_addr64
 | 
			
		||||
; FUNC-LABEL: {{^}}atomic_load_i32_ret_addr64:
 | 
			
		||||
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[RET]]
 | 
			
		||||
define void @atomic_load_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
@a = internal addrspace(2) constant [1 x i8] [ i8 7 ], align 1
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_i8
 | 
			
		||||
; FUNC-LABEL: {{^}}test_i8:
 | 
			
		||||
; EG: CF_END
 | 
			
		||||
; SI: BUFFER_STORE_BYTE
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -17,7 +17,7 @@ define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
 | 
			
		||||
 | 
			
		||||
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_i16
 | 
			
		||||
; FUNC-LABEL: {{^}}test_i16:
 | 
			
		||||
; EG: CF_END
 | 
			
		||||
; SI: BUFFER_STORE_SHORT
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -33,7 +33,7 @@ define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
 | 
			
		||||
; The illegal i8s aren't handled
 | 
			
		||||
@struct_bar_gv = internal addrspace(2) constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ]
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @struct_bar_gv_load
 | 
			
		||||
; FUNC-LABEL: {{^}}struct_bar_gv_load:
 | 
			
		||||
define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
 | 
			
		||||
  %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
 | 
			
		||||
  %load = load i8 addrspace(2)* %gep, align 1
 | 
			
		||||
@@ -48,7 +48,7 @@ define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
 | 
			
		||||
                                                                    <4 x i32> <i32 9, i32 10, i32 11, i32 12>,
 | 
			
		||||
                                                                    <4 x i32> <i32 13, i32 14, i32 15, i32 16> ]
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @array_vector_gv_load
 | 
			
		||||
; FUNC-LABEL: {{^}}array_vector_gv_load:
 | 
			
		||||
define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
 | 
			
		||||
  %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
 | 
			
		||||
  %load = load <4 x i32> addrspace(2)* %gep, align 16
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
 | 
			
		||||
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @float
 | 
			
		||||
; FUNC-LABEL: {{^}}float:
 | 
			
		||||
; FIXME: We should be using S_LOAD_DWORD here.
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD
 | 
			
		||||
 | 
			
		||||
@@ -27,7 +27,7 @@ entry:
 | 
			
		||||
 | 
			
		||||
@i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @i32
 | 
			
		||||
; FUNC-LABEL: {{^}}i32:
 | 
			
		||||
 | 
			
		||||
; FIXME: We should be using S_LOAD_DWORD here.
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD
 | 
			
		||||
@@ -52,7 +52,7 @@ entry:
 | 
			
		||||
 | 
			
		||||
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @struct_foo_gv_load
 | 
			
		||||
; FUNC-LABEL: {{^}}struct_foo_gv_load:
 | 
			
		||||
; SI: S_LOAD_DWORD
 | 
			
		||||
 | 
			
		||||
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
 | 
			
		||||
@@ -67,7 +67,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
 | 
			
		||||
                                                                <1 x i32> <i32 3>,
 | 
			
		||||
                                                                <1 x i32> <i32 4> ]
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @array_v1_gv_load
 | 
			
		||||
; FUNC-LABEL: {{^}}array_v1_gv_load:
 | 
			
		||||
; FIXME: We should be using S_LOAD_DWORD here.
 | 
			
		||||
; SI: BUFFER_LOAD_DWORD
 | 
			
		||||
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
 | 
			
		||||
 | 
			
		||||
define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
 | 
			
		||||
; CHECK-LABEL: @test_load_store
 | 
			
		||||
; CHECK-LABEL: {{^}}test_load_store:
 | 
			
		||||
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
 | 
			
		||||
; CHECK: BUFFER_STORE_SHORT [[TMP]]
 | 
			
		||||
  %val = load half addrspace(1)* %in
 | 
			
		||||
@@ -10,7 +10,7 @@ define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
 | 
			
		||||
; CHECK-LABEL: @test_bitcast_from_half
 | 
			
		||||
; CHECK-LABEL: {{^}}test_bitcast_from_half:
 | 
			
		||||
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
 | 
			
		||||
; CHECK: BUFFER_STORE_SHORT [[TMP]]
 | 
			
		||||
  %val = load half addrspace(1) * %in
 | 
			
		||||
@@ -20,7 +20,7 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
 | 
			
		||||
; CHECK-LABEL: @test_bitcast_to_half
 | 
			
		||||
; CHECK-LABEL: {{^}}test_bitcast_to_half:
 | 
			
		||||
; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
 | 
			
		||||
; CHECK: BUFFER_STORE_SHORT [[TMP]]
 | 
			
		||||
  %val = load i16 addrspace(1)* %in
 | 
			
		||||
@@ -30,7 +30,7 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
 | 
			
		||||
; CHECK-LABEL: @test_extend32
 | 
			
		||||
; CHECK-LABEL: {{^}}test_extend32:
 | 
			
		||||
; CHECK: V_CVT_F32_F16_e32
 | 
			
		||||
 | 
			
		||||
  %val16 = load half addrspace(1)* %in
 | 
			
		||||
@@ -40,7 +40,7 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
 | 
			
		||||
; CHECK-LABEL: @test_extend64
 | 
			
		||||
; CHECK-LABEL: {{^}}test_extend64:
 | 
			
		||||
; CHECK: V_CVT_F32_F16_e32
 | 
			
		||||
; CHECK: V_CVT_F64_F32_e32
 | 
			
		||||
 | 
			
		||||
@@ -51,7 +51,7 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
 | 
			
		||||
; CHECK-LABEL: @test_trunc32
 | 
			
		||||
; CHECK-LABEL: {{^}}test_trunc32:
 | 
			
		||||
; CHECK: V_CVT_F16_F32_e32
 | 
			
		||||
 | 
			
		||||
  %val32 = load float addrspace(1)* %in
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_eq:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_eq:
 | 
			
		||||
; SI: V_CMP_EQ_I64
 | 
			
		||||
define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp eq i64 %a, %b
 | 
			
		||||
@@ -9,7 +9,7 @@ define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_ne:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_ne:
 | 
			
		||||
; SI: V_CMP_NE_I64
 | 
			
		||||
define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp ne i64 %a, %b
 | 
			
		||||
@@ -18,7 +18,7 @@ define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_slt:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_slt:
 | 
			
		||||
; SI: V_CMP_LT_I64
 | 
			
		||||
define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp slt i64 %a, %b
 | 
			
		||||
@@ -27,7 +27,7 @@ define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_ult:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_ult:
 | 
			
		||||
; SI: V_CMP_LT_U64
 | 
			
		||||
define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp ult i64 %a, %b
 | 
			
		||||
@@ -36,7 +36,7 @@ define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_sle:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_sle:
 | 
			
		||||
; SI: V_CMP_LE_I64
 | 
			
		||||
define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp sle i64 %a, %b
 | 
			
		||||
@@ -45,7 +45,7 @@ define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_ule:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_ule:
 | 
			
		||||
; SI: V_CMP_LE_U64
 | 
			
		||||
define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp ule i64 %a, %b
 | 
			
		||||
@@ -54,7 +54,7 @@ define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_sgt:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_sgt:
 | 
			
		||||
; SI: V_CMP_GT_I64
 | 
			
		||||
define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp sgt i64 %a, %b
 | 
			
		||||
@@ -63,7 +63,7 @@ define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_ugt:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_ugt:
 | 
			
		||||
; SI: V_CMP_GT_U64
 | 
			
		||||
define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp ugt i64 %a, %b
 | 
			
		||||
@@ -72,7 +72,7 @@ define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_sge:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_sge:
 | 
			
		||||
; SI: V_CMP_GE_I64
 | 
			
		||||
define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp sge i64 %a, %b
 | 
			
		||||
@@ -81,7 +81,7 @@ define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @test_i64_uge:
 | 
			
		||||
; SI-LABEL: {{^}}test_i64_uge:
 | 
			
		||||
; SI: V_CMP_GE_U64
 | 
			
		||||
define void @test_i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 | 
			
		||||
  %cmp = icmp uge i64 %a, %b
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; Use a 64-bit value with lo bits that can be represented as an inline constant
 | 
			
		||||
; CHECK-LABEL: @i64_imm_inline_lo
 | 
			
		||||
; CHECK-LABEL: {{^}}i64_imm_inline_lo:
 | 
			
		||||
; CHECK: S_MOV_B32 [[LO:s[0-9]+]], 5
 | 
			
		||||
; CHECK: V_MOV_B32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
 | 
			
		||||
; CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_VGPR]]:
 | 
			
		||||
@@ -12,7 +12,7 @@ entry:
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Use a 64-bit value with hi bits that can be represented as an inline constant
 | 
			
		||||
; CHECK-LABEL: @i64_imm_inline_hi
 | 
			
		||||
; CHECK-LABEL: {{^}}i64_imm_inline_hi:
 | 
			
		||||
; CHECK: S_MOV_B32 [[HI:s[0-9]+]], 5
 | 
			
		||||
; CHECK: V_MOV_B32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
 | 
			
		||||
; CHECK: BUFFER_STORE_DWORDX2 v{{\[[0-9]+:}}[[HI_VGPR]]
 | 
			
		||||
@@ -22,7 +22,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_0.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -30,7 +30,7 @@ define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_0.5_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0.5{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -38,7 +38,7 @@ define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_m_0.5_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -0.5{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -46,7 +46,7 @@ define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_1.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 1.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -54,7 +54,7 @@ define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_m_1.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -1.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -62,7 +62,7 @@ define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_2.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 2.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -70,7 +70,7 @@ define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_m_2.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -2.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -78,7 +78,7 @@ define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_4.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 4.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -86,7 +86,7 @@ define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_inline_imm_m_4.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], -4.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -94,7 +94,7 @@ define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @store_literal_imm_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}store_literal_imm_f32:
 | 
			
		||||
; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x45800000
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
define void @store_literal_imm_f32(float addrspace(1)* %out) {
 | 
			
		||||
@@ -102,7 +102,7 @@ define void @store_literal_imm_f32(float addrspace(1)* %out) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_0.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -112,7 +112,7 @@ define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_0.5_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -122,7 +122,7 @@ define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_neg_0.5_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -132,7 +132,7 @@ define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_1.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -142,7 +142,7 @@ define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_neg_1.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -152,7 +152,7 @@ define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_2.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -162,7 +162,7 @@ define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_neg_2.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -172,7 +172,7 @@ define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_4.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
@@ -182,7 +182,7 @@ define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @add_inline_imm_neg_4.0_f32
 | 
			
		||||
; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32
 | 
			
		||||
; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]]
 | 
			
		||||
; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
 | 
			
		||||
; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @private_access_f64_alloca:
 | 
			
		||||
; SI-LABEL: {{^}}private_access_f64_alloca:
 | 
			
		||||
 | 
			
		||||
; SI-ALLOCA: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI-ALLOCA: BUFFER_LOAD_DWORDX2
 | 
			
		||||
@@ -22,7 +22,7 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @private_access_v2f64_alloca:
 | 
			
		||||
; SI-LABEL: {{^}}private_access_v2f64_alloca:
 | 
			
		||||
 | 
			
		||||
; SI-ALLOCA: BUFFER_STORE_DWORDX4
 | 
			
		||||
; SI-ALLOCA: BUFFER_LOAD_DWORDX4
 | 
			
		||||
@@ -46,7 +46,7 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @private_access_i64_alloca:
 | 
			
		||||
; SI-LABEL: {{^}}private_access_i64_alloca:
 | 
			
		||||
 | 
			
		||||
; SI-ALLOCA: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI-ALLOCA: BUFFER_LOAD_DWORDX2
 | 
			
		||||
@@ -64,7 +64,7 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @private_access_v2i64_alloca:
 | 
			
		||||
; SI-LABEL: {{^}}private_access_v2i64_alloca:
 | 
			
		||||
 | 
			
		||||
; SI-ALLOCA: BUFFER_STORE_DWORDX4
 | 
			
		||||
; SI-ALLOCA: BUFFER_LOAD_DWORDX4
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @infinite_loop:
 | 
			
		||||
; SI-LABEL: {{^}}infinite_loop:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7
 | 
			
		||||
; SI: BB0_1:
 | 
			
		||||
; SI: BUFFER_STORE_DWORD [[REG]]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,9 @@
 | 
			
		||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
 | 
			
		||||
;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
 | 
			
		||||
 | 
			
		||||
;EG-CHECK-LABEL: @test
 | 
			
		||||
;EG-CHECK-LABEL: {{^}}test:
 | 
			
		||||
;EG-CHECK: EXP_IEEE *
 | 
			
		||||
;CM-CHECK-LABEL: @test
 | 
			
		||||
;CM-CHECK-LABEL: {{^}}test:
 | 
			
		||||
;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X|
 | 
			
		||||
;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X|
 | 
			
		||||
;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@
 | 
			
		||||
; FIXME: Why is the constant moved into the intermediate register and
 | 
			
		||||
; not just directly into the vector component?
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @insertelement_v4f32_0:
 | 
			
		||||
; SI-LABEL: {{^}}insertelement_v4f32_0:
 | 
			
		||||
; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
 | 
			
		||||
; V_MOV_B32_e32
 | 
			
		||||
; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
 | 
			
		||||
@@ -20,35 +20,35 @@ define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @insertelement_v4f32_1:
 | 
			
		||||
; SI-LABEL: {{^}}insertelement_v4f32_1:
 | 
			
		||||
define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
 | 
			
		||||
  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
 | 
			
		||||
  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @insertelement_v4f32_2:
 | 
			
		||||
; SI-LABEL: {{^}}insertelement_v4f32_2:
 | 
			
		||||
define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
 | 
			
		||||
  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
 | 
			
		||||
  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @insertelement_v4f32_3:
 | 
			
		||||
; SI-LABEL: {{^}}insertelement_v4f32_3:
 | 
			
		||||
define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
 | 
			
		||||
  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
 | 
			
		||||
  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @insertelement_v4i32_0:
 | 
			
		||||
; SI-LABEL: {{^}}insertelement_v4i32_0:
 | 
			
		||||
define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
 | 
			
		||||
  %vecins = insertelement <4 x i32> %a, i32 999, i32 0
 | 
			
		||||
  store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v2f32:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v2f32:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 0x40a00000
 | 
			
		||||
; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
 | 
			
		||||
@@ -58,7 +58,7 @@ define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x fl
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v4f32:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v4f32:
 | 
			
		||||
; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 0x40a00000
 | 
			
		||||
; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
 | 
			
		||||
@@ -68,7 +68,7 @@ define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x fl
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v8f32:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
 | 
			
		||||
@@ -77,7 +77,7 @@ define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x fl
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v16f32:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
@@ -88,7 +88,7 @@ define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v2i32:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v2i32:
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
 | 
			
		||||
  %vecins = insertelement <2 x i32> %a, i32 5, i32 %b
 | 
			
		||||
@@ -96,7 +96,7 @@ define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v4i32:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v4i32:
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX4
 | 
			
		||||
define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
 | 
			
		||||
  %vecins = insertelement <4 x i32> %a, i32 5, i32 %b
 | 
			
		||||
@@ -104,7 +104,7 @@ define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v8i32:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v8i32:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
 | 
			
		||||
@@ -113,7 +113,7 @@ define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v16i32:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v16i32:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
@@ -125,7 +125,7 @@ define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v2i16:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v2i16:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX2
 | 
			
		||||
define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
 | 
			
		||||
  %vecins = insertelement <2 x i16> %a, i16 5, i32 %b
 | 
			
		||||
@@ -133,7 +133,7 @@ define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v4i16:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v4i16:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
 | 
			
		||||
  %vecins = insertelement <4 x i16> %a, i16 5, i32 %b
 | 
			
		||||
@@ -142,7 +142,7 @@ define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v2i8:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v2i8:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_USHORT
 | 
			
		||||
define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
 | 
			
		||||
  %vecins = insertelement <2 x i8> %a, i8 5, i32 %b
 | 
			
		||||
@@ -150,7 +150,7 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v4i8:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v4i8:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORD
 | 
			
		||||
define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
 | 
			
		||||
  %vecins = insertelement <4 x i8> %a, i8 5, i32 %b
 | 
			
		||||
@@ -158,7 +158,7 @@ define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v8i8:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v8i8:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX2
 | 
			
		||||
define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
 | 
			
		||||
  %vecins = insertelement <8 x i8> %a, i8 5, i32 %b
 | 
			
		||||
@@ -166,7 +166,7 @@ define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v16i8:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v16i8:
 | 
			
		||||
; FIXMESI: BUFFER_STORE_DWORDX4
 | 
			
		||||
define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
 | 
			
		||||
  %vecins = insertelement <16 x i8> %a, i8 5, i32 %b
 | 
			
		||||
@@ -176,7 +176,7 @@ define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8>
 | 
			
		||||
 | 
			
		||||
; This test requires handling INSERT_SUBREG in SIFixSGPRCopies.  Check that
 | 
			
		||||
; the compiler doesn't crash.
 | 
			
		||||
; SI-LABEL: @insert_split_bb
 | 
			
		||||
; SI-LABEL: {{^}}insert_split_bb:
 | 
			
		||||
define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
 | 
			
		||||
entry:
 | 
			
		||||
  %0 = insertelement <2 x i32> undef, i32 %a, i32 0
 | 
			
		||||
@@ -200,7 +200,7 @@ endif:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v2f64:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
@@ -212,7 +212,7 @@ define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x d
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v2i64:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -222,7 +222,7 @@ define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64>
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v4f64:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
@@ -234,7 +234,7 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SI-LABEL: @dynamic_insertelement_v8f64:
 | 
			
		||||
; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 | 
			
		||||
 | 
			
		||||
; CHECK: @main1
 | 
			
		||||
; CHECK: {{^}}main1:
 | 
			
		||||
; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
 | 
			
		||||
define void @main1() {
 | 
			
		||||
main_body:
 | 
			
		||||
@@ -48,7 +48,7 @@ main_body:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: @main2
 | 
			
		||||
; CHECK: {{^}}main2:
 | 
			
		||||
; CHECK-NOT: MOV
 | 
			
		||||
define void @main2() {
 | 
			
		||||
main_body:
 | 
			
		||||
 
 | 
			
		||||
@@ -2,9 +2,9 @@
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
 | 
			
		||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @i8_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}i8_arg:
 | 
			
		||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 | 
			
		||||
; SI-CHECK-LABEL: @i8_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}i8_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
 | 
			
		||||
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
 | 
			
		||||
@@ -14,9 +14,9 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @i8_zext_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}i8_zext_arg:
 | 
			
		||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 | 
			
		||||
; SI-CHECK-LABEL: @i8_zext_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}i8_zext_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 | 
			
		||||
 | 
			
		||||
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
 | 
			
		||||
@@ -26,9 +26,9 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @i8_sext_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}i8_sext_arg:
 | 
			
		||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 | 
			
		||||
; SI-CHECK-LABEL: @i8_sext_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}i8_sext_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 | 
			
		||||
 | 
			
		||||
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
 | 
			
		||||
@@ -38,9 +38,9 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @i16_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}i16_arg:
 | 
			
		||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 | 
			
		||||
; SI-CHECK-LABEL: @i16_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}i16_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
 | 
			
		||||
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
 | 
			
		||||
@@ -50,9 +50,9 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @i16_zext_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}i16_zext_arg:
 | 
			
		||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 | 
			
		||||
; SI-CHECK-LABEL: @i16_zext_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}i16_zext_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 | 
			
		||||
 | 
			
		||||
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
 | 
			
		||||
@@ -62,9 +62,9 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @i16_sext_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}i16_sext_arg:
 | 
			
		||||
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 | 
			
		||||
; SI-CHECK-LABEL: @i16_sext_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}i16_sext_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 | 
			
		||||
 | 
			
		||||
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
 | 
			
		||||
@@ -74,9 +74,9 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @i32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}i32_arg:
 | 
			
		||||
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
 | 
			
		||||
; SI-CHECK-LABEL: @i32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}i32_arg:
 | 
			
		||||
; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 | 
			
		||||
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -84,9 +84,9 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @f32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}f32_arg:
 | 
			
		||||
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
 | 
			
		||||
; SI-CHECK-LABEL: @f32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}f32_arg:
 | 
			
		||||
; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 | 
			
		||||
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -94,10 +94,10 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v2i8_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v2i8_arg:
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; SI-CHECK-LABEL: @v2i8_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v2i8_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
 | 
			
		||||
@@ -106,10 +106,10 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v2i16_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v2i16_arg:
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; SI-CHECK-LABEL: @v2i16_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v2i16_arg:
 | 
			
		||||
; SI-CHECK-DAG: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI-CHECK-DAG: BUFFER_LOAD_USHORT
 | 
			
		||||
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
 | 
			
		||||
@@ -118,10 +118,10 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v2i32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v2i32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
 | 
			
		||||
; SI-CHECK-LABEL: @v2i32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v2i32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
 | 
			
		||||
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -129,10 +129,10 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v2f32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v2f32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
 | 
			
		||||
; SI-CHECK-LABEL: @v2f32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v2f32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
 | 
			
		||||
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -140,32 +140,32 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v3i8_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v3i8_arg:
 | 
			
		||||
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
 | 
			
		||||
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
 | 
			
		||||
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
 | 
			
		||||
; SI-CHECK-LABEL: @v3i8_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v3i8_arg:
 | 
			
		||||
define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v3i16_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v3i16_arg:
 | 
			
		||||
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
 | 
			
		||||
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
 | 
			
		||||
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
 | 
			
		||||
; SI-CHECK-LABEL: @v3i16_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v3i16_arg:
 | 
			
		||||
define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
; EG-CHECK-LABEL: @v3i32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v3i32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 | 
			
		||||
; SI-CHECK-LABEL: @v3i32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v3i32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
 | 
			
		||||
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -173,11 +173,11 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v3f32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v3f32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 | 
			
		||||
; SI-CHECK-LABEL: @v3f32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v3f32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
 | 
			
		||||
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -185,12 +185,12 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v4i8_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v4i8_arg:
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; SI-CHECK-LABEL: @v4i8_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v4i8_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
@@ -201,12 +201,12 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v4i16_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v4i16_arg:
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; SI-CHECK-LABEL: @v4i16_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v4i16_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
@@ -217,12 +217,12 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v4i32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v4i32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
 | 
			
		||||
; SI-CHECK-LABEL: @v4i32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v4i32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
 | 
			
		||||
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -230,12 +230,12 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v4f32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v4f32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
 | 
			
		||||
; SI-CHECK-LABEL: @v4f32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v4f32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
 | 
			
		||||
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -243,7 +243,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v8i8_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v8i8_arg:
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
@@ -252,7 +252,7 @@ entry:
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; SI-CHECK-LABEL: @v8i8_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v8i8_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
@@ -266,7 +266,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v8i16_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v8i16_arg:
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
@@ -275,7 +275,7 @@ entry:
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; SI-CHECK-LABEL: @v8i16_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v8i16_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
@@ -290,7 +290,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v8i32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v8i32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
 | 
			
		||||
@@ -299,7 +299,7 @@ entry:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
 | 
			
		||||
; SI-CHECK-LABEL: @v8i32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v8i32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
 | 
			
		||||
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -307,7 +307,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v8f32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v8f32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
 | 
			
		||||
@@ -316,7 +316,7 @@ entry:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
 | 
			
		||||
; SI-CHECK-LABEL: @v8f32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v8f32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
 | 
			
		||||
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -324,7 +324,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v16i8_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v16i8_arg:
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
@@ -341,7 +341,7 @@ entry:
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; EG-CHECK: VTX_READ_8
 | 
			
		||||
; SI-CHECK-LABEL: @v16i8_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v16i8_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_UBYTE
 | 
			
		||||
@@ -364,7 +364,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v16i16_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v16i16_arg:
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
@@ -381,7 +381,7 @@ entry:
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; EG-CHECK: VTX_READ_16
 | 
			
		||||
; SI-CHECK-LABEL: @v16i16_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v16i16_arg:
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
; SI-CHECK: BUFFER_LOAD_USHORT
 | 
			
		||||
@@ -404,7 +404,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v16i32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v16i32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
 | 
			
		||||
@@ -421,7 +421,7 @@ entry:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
 | 
			
		||||
; SI-CHECK-LABEL: @v16i32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v16i32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
 | 
			
		||||
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -429,7 +429,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; EG-CHECK-LABEL: @v16f32_arg
 | 
			
		||||
; EG-CHECK-LABEL: {{^}}v16f32_arg:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
 | 
			
		||||
@@ -446,7 +446,7 @@ entry:
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 | 
			
		||||
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
 | 
			
		||||
; SI-CHECK-LABEL: @v16f32_arg
 | 
			
		||||
; SI-CHECK-LABEL: {{^}}v16f32_arg:
 | 
			
		||||
; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
 | 
			
		||||
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
 | 
			
		||||
entry:
 | 
			
		||||
@@ -454,7 +454,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @kernel_arg_i64
 | 
			
		||||
; FUNC-LABEL: {{^}}kernel_arg_i64:
 | 
			
		||||
; SI: S_LOAD_DWORDX2
 | 
			
		||||
; SI: S_LOAD_DWORDX2
 | 
			
		||||
; SI: BUFFER_STORE_DWORDX2
 | 
			
		||||
@@ -463,7 +463,7 @@ define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; XFUNC-LABEL: @kernel_arg_v1i64
 | 
			
		||||
; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
 | 
			
		||||
; XSI: S_LOAD_DWORDX2
 | 
			
		||||
; XSI: S_LOAD_DWORDX2
 | 
			
		||||
; XSI: BUFFER_STORE_DWORDX2
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@
 | 
			
		||||
; because the LDS instructions are pseudo instructions and the OQAP
 | 
			
		||||
; reads and writes are bundled together in the same instruction.
 | 
			
		||||
 | 
			
		||||
; CHECK: @lds_crash
 | 
			
		||||
; CHECK: {{^}}lds_crash:
 | 
			
		||||
define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
 | 
			
		||||
entry:
 | 
			
		||||
  %0 = load i32 addrspace(3)* %in
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@
 | 
			
		||||
; This test checks that the lds input queue will is empty at the end of
 | 
			
		||||
; the ALU clause.
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @lds_input_queue
 | 
			
		||||
; CHECK-LABEL: {{^}}lds_input_queue:
 | 
			
		||||
; CHECK: LDS_READ_RET * OQAP
 | 
			
		||||
; CHECK-NOT: ALU clause
 | 
			
		||||
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
 | 
			
		||||
@@ -84,7 +84,7 @@ declare void @llvm.AMDGPU.barrier.local()
 | 
			
		||||
; analysis, we should be able to keep these instructions sparate before
 | 
			
		||||
; scheduling.
 | 
			
		||||
;
 | 
			
		||||
; CHECK-LABEL: @local_global_alias
 | 
			
		||||
; CHECK-LABEL: {{^}}local_global_alias:
 | 
			
		||||
; CHECK: LDS_READ_RET
 | 
			
		||||
; CHECK-NOT: ALU clause
 | 
			
		||||
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@
 | 
			
		||||
; This test makes sure we do not double count global values when they are
 | 
			
		||||
; used in different basic blocks.
 | 
			
		||||
 | 
			
		||||
; CHECK-LABEL: @test
 | 
			
		||||
; CHECK-LABEL: {{^}}test:
 | 
			
		||||
; CHECK: .long   166120
 | 
			
		||||
; CHECK-NEXT: .long   1
 | 
			
		||||
@lds = internal unnamed_addr addrspace(3) global i32 zeroinitializer, align 4
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@
 | 
			
		||||
; instructions, when only one is needed.
 | 
			
		||||
;
 | 
			
		||||
 | 
			
		||||
; CHECK: @setcc_expand
 | 
			
		||||
; CHECK: {{^}}setcc_expand:
 | 
			
		||||
; CHECK: SET
 | 
			
		||||
; CHECK-NOT: CND
 | 
			
		||||
define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) {
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
; or
 | 
			
		||||
; ADD_INT literal.x KC0[2].Z, 5
 | 
			
		||||
 | 
			
		||||
; CHECK: @i32_literal
 | 
			
		||||
; CHECK: {{^}}i32_literal:
 | 
			
		||||
; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 | 
			
		||||
; CHECK-NEXT: LSHR
 | 
			
		||||
; CHECK-NEXT: 5
 | 
			
		||||
@@ -23,7 +23,7 @@ entry:
 | 
			
		||||
; or
 | 
			
		||||
; ADD literal.x KC0[2].Z, 5.0
 | 
			
		||||
 | 
			
		||||
; CHECK: @float_literal
 | 
			
		||||
; CHECK: {{^}}float_literal:
 | 
			
		||||
; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 | 
			
		||||
; CHECK-NEXT: LSHR
 | 
			
		||||
; CHECK-NEXT: 1084227584(5.0
 | 
			
		||||
@@ -35,7 +35,7 @@ entry:
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Make sure inline literals are folded into REG_SEQUENCE instructions.
 | 
			
		||||
; CHECK: @inline_literal_reg_sequence
 | 
			
		||||
; CHECK: {{^}}inline_literal_reg_sequence:
 | 
			
		||||
; CHECK: MOV {{\** *}}T[[GPR:[0-9]]].X, 0.0
 | 
			
		||||
; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Y, 0.0
 | 
			
		||||
; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Z, 0.0
 | 
			
		||||
@@ -47,7 +47,7 @@ entry:
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: @inline_literal_dot4
 | 
			
		||||
; CHECK: {{^}}inline_literal_dot4:
 | 
			
		||||
; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
 | 
			
		||||
; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
 | 
			
		||||
; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
 | 
			
		||||
; Legacy name
 | 
			
		||||
declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @s_abs_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}s_abs_i32:
 | 
			
		||||
; SI: S_SUB_I32
 | 
			
		||||
; SI: S_MAX_I32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -19,7 +19,7 @@ define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @v_abs_i32
 | 
			
		||||
; FUNC-LABEL: {{^}}v_abs_i32:
 | 
			
		||||
; SI: V_SUB_I32_e32
 | 
			
		||||
; SI: V_MAX_I32_e32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
@@ -33,7 +33,7 @@ define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @abs_i32_legacy_amdil
 | 
			
		||||
; FUNC-LABEL: {{^}}abs_i32_legacy_amdil:
 | 
			
		||||
; SI: V_SUB_I32_e32
 | 
			
		||||
; SI: V_MAX_I32_e32
 | 
			
		||||
; SI: S_ENDPGM
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_barrier_global
 | 
			
		||||
; FUNC-LABEL: {{^}}test_barrier_global:
 | 
			
		||||
; EG: GROUP_BARRIER
 | 
			
		||||
; SI: S_BARRIER
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
			
		||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 | 
			
		||||
 | 
			
		||||
; FUNC-LABEL: @test_barrier_local
 | 
			
		||||
; FUNC-LABEL: {{^}}test_barrier_local:
 | 
			
		||||
; EG: GROUP_BARRIER
 | 
			
		||||
; SI: S_BARRIER
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user