diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 6745fed3bae..e30abc08bdb 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -196,6 +196,11 @@ def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; +def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value), + (atomic_load_sub node:$ptr, node:$value), [{ + return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index efa475198ca..24bc6b0241c 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1673,6 +1673,7 @@ class R600_LDS_1A2D lds_op, string name, list pattern> : } def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; +def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >; def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] >; @@ -1685,6 +1686,9 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] >; +def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", + [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] +>; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] >; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 31a5ad237e7..14a189a07af 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -392,6 +392,7 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; +def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>; def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; @@ -1779,6 +1780,9 @@ def : DSWritePat ; def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>; +def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val), + (DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>; + /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll new file mode 100644 index 00000000000..e256f07c723 --- /dev/null +++ b/test/CodeGen/R600/atomic_load_sub.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK + +; R600-CHECK-LABEL: @atomic_sub_local +; R600-CHECK: LDS_SUB * +; SI-CHECK-LABEL: @atomic_sub_local +; SI-CHECK: DS_SUB_U32_RTN 0 +define void @atomic_sub_local(i32 addrspace(3)* %local) { +entry: + %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst + ret void +} + +; R600-CHECK-LABEL: @atomic_sub_ret_local +; R600-CHECK: LDS_SUB_RET * +; SI-CHECK-LABEL: @atomic_sub_ret_local +; SI-CHECK: DS_SUB_U32_RTN 0 +define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { +entry: + %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst + store i32 %0, i32 addrspace(1)* %out + ret void +}