diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 8973f2898c9..173f9bbd21c 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -99,10 +99,18 @@ def as_i32imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), MVT::i32); }]>; +def IMM8bit : PatLeaf <(imm), + [{return isUInt<8>(N->getZExtValue());}] +>; + def IMM12bit : PatLeaf <(imm), [{return isUInt<12>(N->getZExtValue());}] >; +def IMM16bit : PatLeaf <(imm), + [{return isUInt<16>(N->getZExtValue());}] +>; + def mubuf_vaddr_offset : PatFrag< (ops node:$ptr, node:$offset, node:$imm_offset), (add (add node:$ptr, node:$offset), node:$imm_offset) @@ -387,6 +395,7 @@ class DS_1A op, dag outs, dag ins, string asm, list pat> : DS { bits<16> offset; + // Single load interpret the 2 i8imm operands as a single i16 offset. let offset0 = offset{7-0}; let offset1 = offset{15-8}; } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index a5b9c033ea2..3b55d427edb 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1915,29 +1915,39 @@ def : Pat < /********** Load/Store Patterns **********/ /********** ======================= **********/ -class DSReadPat : Pat < - (frag i32:$src0), - (vt (inst 0, $src0, 0)) ->; +multiclass DSReadPat { + def : Pat < + (vt (frag (add i32:$ptr, (i32 IMM16bit:$offset)))), + (inst (i1 0), $ptr, (as_i16imm $offset)) + >; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : Pat < - (local_load i32:$src0), - (i32 (DS_READ_B32 0, $src0, 0)) ->; + def : Pat < + (frag i32:$src0), + (vt (inst 0, $src0, 0)) + >; +} -class DSWritePat : Pat < - (frag i32:$src1, i32:$src0), - (inst 0, $src0, $src1, 0) ->; +defm : DSReadPat ; +defm : DSReadPat ; +defm : DSReadPat ; +defm : DSReadPat ; +defm : DSReadPat ; -def : DSWritePat ; -def : DSWritePat ; -def : DSWritePat ; +multiclass DSWritePat { + def : Pat < + (frag vt:$value, (add i32:$ptr, (i32 IMM16bit:$offset))), + (inst (i1 0), $ptr, $value, (as_i16imm $offset)) + >; + + def : Pat < + (frag i32:$src1, i32:$src0), + (inst 0, $src0, $src1, 0) + >; +} + +defm : DSWritePat ; +defm : DSWritePat ; +defm : DSWritePat ; def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), (DS_ADD_U32_RTN 0, $ptr, $val, 0)>; diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll index 8c31e01f537..fffaefe0983 100644 --- a/test/CodeGen/R600/32-bit-local-address-space.ll +++ b/test/CodeGen/R600/32-bit-local-address-space.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: @local_address_load ; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]] -; CHECK: DS_READ_B32 [[PTR]] +; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]] define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { entry: %0 = load i32 addrspace(3)* %in @@ -32,9 +32,8 @@ entry: } ; CHECK-LABEL: @local_address_gep_const_offset -; CHECK: S_ADD_I32 [[SPTR:s[0-9]]] -; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]] +; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}} +; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 4, define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { entry: %0 = getelementptr i32 addrspace(3)* %in, i32 1 @@ -43,6 +42,19 @@ entry: ret void } +; Offset too large, can't fold into 16-bit immediate offset. +; CHECK-LABEL: @local_address_gep_large_const_offset +; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540 +; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; CHECK: DS_READ_B32 [[VPTR]] +define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %0 = getelementptr i32 addrspace(3)* %in, i32 16385 + %1 = load i32 addrspace(3)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + ; CHECK-LABEL: @null_32bit_lds_ptr: ; CHECK: V_CMP_NE_I32 ; CHECK-NOT: V_CMP_NE_I32 @@ -86,3 +98,41 @@ define void @global_ptr() nounwind { store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr ret void } + +; CHECK-LABEL: @local_address_store +; CHECK: DS_WRITE_B32 +define void @local_address_store(i32 addrspace(3)* %out, i32 %val) { + store i32 %val, i32 addrspace(3)* %out + ret void +} + +; CHECK-LABEL: @local_address_gep_store +; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]], +; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]] +; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}}, +define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) { + %gep = getelementptr i32 addrspace(3)* %out, i32 %offset + store i32 %val, i32 addrspace(3)* %gep, align 4 + ret void +} + +; CHECK-LABEL: @local_address_gep_const_offset_store +; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}} +; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} +; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 4 +define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) { + %gep = getelementptr i32 addrspace(3)* %out, i32 1 + store i32 %val, i32 addrspace(3)* %gep, align 4 + ret void +} + +; Offset too large, can't fold into 16-bit immediate offset. +; CHECK-LABEL: @local_address_gep_large_const_offset_store +; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540 +; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0 +define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) { + %gep = getelementptr i32 addrspace(3)* %out, i32 16385 + store i32 %val, i32 addrspace(3)* %gep, align 4 + ret void +} diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll index 1fc616a4ed4..15d2ed23818 100644 --- a/test/CodeGen/R600/address-space.ll +++ b/test/CodeGen/R600/address-space.ll @@ -4,11 +4,14 @@ %struct.foo = type { [3 x float], [3 x float] } +; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is +; already in a VGPR after the first read. + ; CHECK-LABEL: @do_as_ptr_calcs: -; CHECK: S_ADD_I32 {{s[0-9]+}}, -; CHECK: S_ADD_I32 [[SREG1:s[0-9]+]], +; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]], ; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]] -; CHECK: DS_READ_B32 [[VREG1]], +; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 20 +; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 12 define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind { entry: %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll index 494b815674c..ee914fafe91 100644 --- a/test/CodeGen/R600/gep-address-space.ll +++ b/test/CodeGen/R600/gep-address-space.ll @@ -2,12 +2,22 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: @use_gep_address_space: -; CHECK: S_ADD_I32 +; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}} +; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 64 %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16 store i32 99, i32 addrspace(3)* %p ret void } +define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind { +; CHECK-LABEL: @use_gep_address_space_large_offset: +; CHECK: S_ADD_I32 +; CHECK: DS_WRITE_B32 + %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384 + store i32 99, i32 addrspace(3)* %p + ret void +} + define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind { ; CHECK-LABEL: @gep_as_vector_v4: ; CHECK: S_ADD_I32 diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll index fdb2a5d292d..616000d45da 100644 --- a/test/CodeGen/R600/local-memory-two-objects.ll +++ b/test/CodeGen/R600/local-memory-two-objects.ll @@ -24,11 +24,12 @@ ; EG-CHECK: GROUP_BARRIER ; EG-CHECK-NEXT: ALU clause -; Make sure the lds reads are using different addresses. +; Make sure the lds reads are using different addresses, at different +; constant offsets. ; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] -; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] -; SI-CHECK-NOT: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]] +; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 16 +; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0, define void @local_memory_two_objects(i32 addrspace(1)* %out) { entry: