mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
R600/SI: Use same complex patterns for DS atomics
This fixes hitting the same negative base offset problem that was already fixed for regular loads and stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217256 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
353cf20b9b
commit
89a7e3ec3e
@ -2572,17 +2572,10 @@ def : Pat <
|
||||
(EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
|
||||
>;
|
||||
|
||||
multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
|
||||
def : Pat <
|
||||
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
|
||||
(inst (i1 0), $ptr, $value, (as_i16imm $offset))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(frag i32:$ptr, vt:$val),
|
||||
(inst 0, $ptr, $val, 0)
|
||||
>;
|
||||
}
|
||||
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
|
||||
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
|
||||
(inst (i1 0), $ptr, $value, (as_i16imm $offset))
|
||||
>;
|
||||
|
||||
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
|
||||
//
|
||||
@ -2594,69 +2587,56 @@ multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
|
||||
// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
|
||||
// needs to be a VGPR. The SGPR copy pass will fix this, and it's
|
||||
// easier since there is no v_mov_b64.
|
||||
multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
|
||||
Instruction LoadImm, PatFrag frag> {
|
||||
def : Pat <
|
||||
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
|
||||
(inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
|
||||
>;
|
||||
class DSAtomicIncRetPat<DS inst, ValueType vt,
|
||||
Instruction LoadImm, PatFrag frag> : Pat <
|
||||
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
|
||||
(inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(frag i32:$ptr, (vt 1)),
|
||||
(inst 0, $ptr, (LoadImm (vt -1)), 0)
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
|
||||
def : Pat <
|
||||
(frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
|
||||
(inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(frag i32:$ptr, vt:$cmp, vt:$swap),
|
||||
(inst 0, $ptr, $cmp, $swap, 0)
|
||||
>;
|
||||
}
|
||||
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
|
||||
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
|
||||
(inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
|
||||
>;
|
||||
|
||||
|
||||
// 32-bit atomics.
|
||||
defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
|
||||
S_MOV_B32, atomic_load_add_local>;
|
||||
defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
|
||||
S_MOV_B32, atomic_load_sub_local>;
|
||||
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
|
||||
S_MOV_B32, atomic_load_add_local>;
|
||||
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
|
||||
S_MOV_B32, atomic_load_sub_local>;
|
||||
|
||||
defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
|
||||
defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
|
||||
defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
|
||||
defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
|
||||
defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
|
||||
defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
|
||||
defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
|
||||
defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
|
||||
defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
|
||||
defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
|
||||
def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
|
||||
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
|
||||
def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
|
||||
def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
|
||||
def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
|
||||
def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
|
||||
def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
|
||||
def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
|
||||
def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
|
||||
def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
|
||||
|
||||
defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
|
||||
def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
|
||||
|
||||
// 64-bit atomics.
|
||||
defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
|
||||
S_MOV_B64, atomic_load_add_local>;
|
||||
defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
|
||||
S_MOV_B64, atomic_load_sub_local>;
|
||||
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
|
||||
S_MOV_B64, atomic_load_add_local>;
|
||||
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
|
||||
S_MOV_B64, atomic_load_sub_local>;
|
||||
|
||||
defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
|
||||
defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
|
||||
defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
|
||||
defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
|
||||
defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
|
||||
defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
|
||||
defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
|
||||
defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
|
||||
defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
|
||||
defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
|
||||
def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
|
||||
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
|
||||
def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
|
||||
def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
|
||||
def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
|
||||
def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
|
||||
def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
|
||||
def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
|
||||
def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
|
||||
def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
|
||||
|
||||
defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
|
||||
def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
|
||||
; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
@ -35,3 +36,17 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
|
||||
store i64 %result, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_bad_si_offset
|
||||
; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
|
||||
; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
|
||||
; SI: S_ENDPGM
|
||||
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
|
||||
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
|
||||
%result = extractvalue { i32, i1 } %pair, 0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
|
||||
; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
|
||||
@ -47,6 +48,19 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @lds_atomic_add_ret_i32_bad_si_offset
|
||||
; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
|
||||
; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
|
||||
; SI: S_ENDPGM
|
||||
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
|
||||
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @lds_atomic_inc_ret_i32:
|
||||
; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
|
||||
; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
|
||||
@ -70,6 +84,19 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @lds_atomic_inc_ret_i32_bad_si_offset:
|
||||
; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0
|
||||
; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
|
||||
; SI: S_ENDPGM
|
||||
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
|
||||
%sub = sub i32 %a, %b
|
||||
%add = add i32 %sub, 4
|
||||
%gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
|
||||
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @lds_atomic_sub_ret_i32:
|
||||
; SI: DS_SUB_RTN_U32
|
||||
; SI: S_ENDPGM
|
||||
|
Loading…
Reference in New Issue
Block a user