R600/SI: Use same complex patterns for DS atomics

This fixes hitting the same negative base offset problem
that was already fixed for regular loads and stores.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217256 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-09-05 16:24:58 +00:00
parent 353cf20b9b
commit 89a7e3ec3e
3 changed files with 86 additions and 64 deletions

View File

@ -2572,17 +2572,10 @@ def : Pat <
(EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
>;
multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
def : Pat <
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
(inst (i1 0), $ptr, $value, (as_i16imm $offset))
>;
def : Pat <
(frag i32:$ptr, vt:$val),
(inst 0, $ptr, $val, 0)
>;
}
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
(inst (i1 0), $ptr, $value, (as_i16imm $offset))
>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
//
@ -2594,69 +2587,56 @@ multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
// needs to be a VGPR. The SGPR copy pass will fix this, and it's
// easier since there is no v_mov_b64.
multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> {
def : Pat <
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
(inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
>;
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
(inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
>;
def : Pat <
(frag i32:$ptr, (vt 1)),
(inst 0, $ptr, (LoadImm (vt -1)), 0)
>;
}
multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
(inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
>;
def : Pat <
(frag i32:$ptr, vt:$cmp, vt:$swap),
(inst 0, $ptr, $cmp, $swap, 0)
>;
}
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
(inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
>;
// 32-bit atomics.
defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
S_MOV_B32, atomic_load_add_local>;
defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
S_MOV_B32, atomic_load_sub_local>;
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
S_MOV_B32, atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
S_MOV_B32, atomic_load_sub_local>;
defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
// 64-bit atomics.
defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
S_MOV_B64, atomic_load_add_local>;
defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
S_MOV_B64, atomic_load_sub_local>;
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
S_MOV_B64, atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
S_MOV_B64, atomic_load_sub_local>;
defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
//===----------------------------------------------------------------------===//

View File

@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
@ -35,3 +36,17 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_bad_si_offset
; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: S_ENDPGM
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}

View File

@ -1,4 +1,5 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
@ -47,6 +48,19 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
; FUNC-LABEL: @lds_atomic_add_ret_i32_bad_si_offset
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: S_ENDPGM
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @lds_atomic_inc_ret_i32:
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
@ -70,6 +84,19 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
; FUNC-LABEL: @lds_atomic_inc_ret_i32_bad_si_offset:
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
; SI: S_ENDPGM
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @lds_atomic_sub_ret_i32:
; SI: DS_SUB_RTN_U32
; SI: S_ENDPGM