diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index bad5de4c473..bfd514766ac 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -494,32 +494,6 @@ class DS_1A1D_RET op, string asm, RegisterClass rc> : DS_1A < let mayLoad = 1; } -// 1 address, 0 data. -class DS_1A0D_RET op, string asm, RegisterClass rc> : DS_1A < - op, - (outs rc:$vdst), - (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset), - asm#" $vdst, $addr, $offset, [M0]", - []> { - let data0 = 0; - let data1 = 0; - let mayStore = 1; - let mayLoad = 1; -} - -// 1 address, 0 data. -class DS_1A0D_NORET op, string asm, RegisterClass rc> : DS_1A < - op, - (outs ), - (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset), - asm#" $addr, $offset, [M0]", - []> { - let data0 = 0; - let data1 = 0; - let mayStore = 1; - let mayLoad = 1; -} - // 1 address, 2 data. class DS_1A2D_RET op, string asm, RegisterClass rc> : DS_1A < op, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0283e06d570..acdbc9b0342 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -717,8 +717,8 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>; def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>; def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>; -def DS_INC_U32 : DS_1A0D_NORET <0x3, "DS_INC_U32", VReg_32>; -def DS_DEC_U32 : DS_1A0D_NORET <0x4, "DS_DEC_U32", VReg_32>; +def DS_INC_U32 : DS_1A1D_NORET <0x3, "DS_INC_U32", VReg_32>; +def DS_DEC_U32 : DS_1A1D_NORET <0x4, "DS_DEC_U32", VReg_32>; def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>; def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>; def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>; @@ -735,8 +735,8 @@ def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>; def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32>; def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32>; def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32>; -def DS_INC_RTN_U32 : DS_1A0D_RET <0x23, "DS_INC_RTN_U32", VReg_32>; -def DS_DEC_RTN_U32 : DS_1A0D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>; +def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "DS_INC_RTN_U32", VReg_32>; +def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>; def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32>; def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32>; def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32>; @@ -761,8 +761,8 @@ def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>; def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_32>; def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_32>; def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_32>; -def DS_INC_U64 : DS_1A0D_NORET <0x43, "DS_INC_U64", VReg_32>; -def DS_DEC_U64 : DS_1A0D_NORET <0x44, "DS_DEC_U64", VReg_32>; +def DS_INC_U64 : DS_1A1D_NORET <0x43, "DS_INC_U64", VReg_32>; +def DS_DEC_U64 : DS_1A1D_NORET <0x44, "DS_DEC_U64", VReg_32>; def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>; def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>; def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>; @@ -779,8 +779,8 @@ def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>; def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64>; def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64>; def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64>; -def DS_INC_RTN_U64 : DS_1A0D_RET <0x63, "DS_INC_RTN_U64", VReg_64>; -def DS_DEC_RTN_U64 : DS_1A0D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>; +def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "DS_INC_RTN_U64", VReg_64>; +def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>; def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64>; def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64>; def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64>; @@ -2272,15 +2272,25 @@ multiclass DSAtomicRetPat { } // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec -multiclass DSAtomicIncRetPat { +// +// We need to use something for the data0, so we set a register to +// -1. For the non-rtn variants, the manual says it does +// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max +// will always do the increment so I'm assuming it's the same. +// +// We also load this -1 with s_mov_b32 / s_mov_b64 even though this +// needs to be a VGPR. The SGPR copy pass will fix this, and it's +// easier since there is no v_mov_b64. +multiclass DSAtomicIncRetPat { def : Pat < (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)), - (inst (i1 0), $ptr, (as_i16imm $offset)) + (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset)) >; def : Pat < (frag i32:$ptr, (vt 1)), - (inst 0, $ptr, 0) + (inst 0, $ptr, (LoadImm (vt -1)), 0) >; } @@ -2298,8 +2308,10 @@ multiclass DSAtomicCmpXChg { // 32-bit atomics. -defm : DSAtomicIncRetPat; -defm : DSAtomicIncRetPat; +defm : DSAtomicIncRetPat; +defm : DSAtomicIncRetPat; defm : DSAtomicRetPat; defm : DSAtomicRetPat; @@ -2315,8 +2327,10 @@ defm : DSAtomicRetPat; defm : DSAtomicCmpXChg; // 64-bit atomics. -defm : DSAtomicIncRetPat; -defm : DSAtomicIncRetPat; +defm : DSAtomicIncRetPat; +defm : DSAtomicIncRetPat; defm : DSAtomicRetPat; defm : DSAtomicRetPat; diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll index fb70ddf0692..5a44951055e 100644 --- a/test/CodeGen/R600/local-atomics.ll +++ b/test/CodeGen/R600/local-atomics.ll @@ -48,7 +48,9 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace } ; FUNC-LABEL: @lds_atomic_inc_ret_i32: -; SI: DS_INC_RTN_U32 +; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 +; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] +; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0 ; SI: S_ENDPGM define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst @@ -57,7 +59,9 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p } ; FUNC-LABEL: @lds_atomic_inc_ret_i32_offset: -; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 +; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] +; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10 ; SI: S_ENDPGM define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 @@ -86,7 +90,9 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace } ; FUNC-LABEL: @lds_atomic_dec_ret_i32: -; SI: DS_DEC_RTN_U32 +; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 +; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] +; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0 ; SI: S_ENDPGM define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst @@ -95,7 +101,9 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p } ; FUNC-LABEL: @lds_atomic_dec_ret_i32_offset: -; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 +; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] +; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10 ; SI: S_ENDPGM define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll index 399cfe73c63..849b033d84a 100644 --- a/test/CodeGen/R600/local-atomics64.ll +++ b/test/CodeGen/R600/local-atomics64.ll @@ -45,7 +45,11 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: @lds_atomic_inc_ret_i64: -; SI: DS_INC_RTN_U64 +; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 +; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] +; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], ; SI: S_ENDPGM define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst @@ -83,7 +87,11 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: @lds_atomic_dec_ret_i64: -; SI: DS_DEC_RTN_U64 +; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 +; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] +; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], ; SI: S_ENDPGM define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst