From b97095b94f1297256e2df0d49355f2f4f10ffcfe Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 11 Jun 2014 18:08:34 +0000 Subject: [PATCH] R600/SI: Refactor local atomics. Use patterns that will also match the immediate offset to match the normal read / writes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 24 ++++++++++---- lib/Target/R600/SIInstructions.td | 17 +++++++--- test/CodeGen/R600/atomic_load_add.ll | 47 ++++++++++++++++++--------- test/CodeGen/R600/atomic_load_sub.ll | 47 ++++++++++++++++++--------- 4 files changed, 92 insertions(+), 43 deletions(-) diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 1345f79646d..1faf9ea06a5 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -232,15 +232,25 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; -def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_add node:$ptr, node:$value), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + +class local_binary_atomic_op : + PatFrag<(ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), [{ + return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; -def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_sub node:$ptr, node:$value), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; + +def atomic_swap_local : local_binary_atomic_op; +def atomic_load_add_local : local_binary_atomic_op; +def atomic_load_sub_local : local_binary_atomic_op; +def atomic_load_and_local : local_binary_atomic_op; +def atomic_load_or_local : local_binary_atomic_op; +def atomic_load_xor_local : local_binary_atomic_op; +def atomic_load_nand_local : local_binary_atomic_op; +def atomic_load_min_local : local_binary_atomic_op; +def atomic_load_max_local : local_binary_atomic_op; +def atomic_load_umin_local : local_binary_atomic_op; +def atomic_load_umax_local : local_binary_atomic_op; def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9af1a706175..70462a1e217 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -2168,11 +2168,20 @@ defm : DSWritePat ; defm : DSWritePat ; defm : DSWritePat ; -def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), - (DS_ADD_U32_RTN 0, $ptr, $val, 0)>; +multiclass DSAtomicPat { + def : Pat < + (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value), + (inst (i1 0), $ptr, $value, (as_i16imm $offset)) + >; -def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val), - (DS_SUB_U32_RTN 0, $ptr, $val, 0)>; + def : Pat < + (frag i32:$ptr, vt:$val), + (inst 0, $ptr, $val, 0) + >; +} + +defm : DSAtomicPat; +defm : DSAtomicPat; //===----------------------------------------------------------------------===// // MUBUF Patterns diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll index cb0242cd0c9..877602c50cc 100644 --- a/test/CodeGen/R600/atomic_load_add.ll +++ b/test/CodeGen/R600/atomic_load_add.ll @@ -1,23 +1,38 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; R600-CHECK-LABEL: @atomic_add_local -; R600-CHECK: LDS_ADD * -; SI-CHECK-LABEL: @atomic_add_local -; SI-CHECK: DS_ADD_U32_RTN +; FUNC-LABEL: @atomic_add_local +; R600: LDS_ADD * +; SI: DS_ADD_U32_RTN define void @atomic_add_local(i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst + %unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst ret void } -; R600-CHECK-LABEL: @atomic_add_ret_local -; R600-CHECK: LDS_ADD_RET * -; SI-CHECK-LABEL: @atomic_add_ret_local -; SI-CHECK: DS_ADD_U32_RTN -define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst - store i32 %0, i32 addrspace(1)* %out +; FUNC-LABEL: @atomic_add_local_const_offset +; R600: LDS_ADD * +; SI: DS_ADD_U32_RTN v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 4 + %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst + ret void +} + +; FUNC-LABEL: @atomic_add_ret_local +; R600: LDS_ADD_RET * +; SI: DS_ADD_U32_RTN +define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { + %val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @atomic_add_ret_local_const_offset +; R600: LDS_ADD_RET * +; SI: DS_ADD_U32_RTN v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14 +define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 5 + %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out ret void } diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll index 7c26e529a36..b2e10a9b3d7 100644 --- a/test/CodeGen/R600/atomic_load_sub.ll +++ b/test/CodeGen/R600/atomic_load_sub.ll @@ -1,23 +1,38 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; R600-CHECK-LABEL: @atomic_sub_local -; R600-CHECK: LDS_SUB * -; SI-CHECK-LABEL: @atomic_sub_local -; SI-CHECK: DS_SUB_U32_RTN +; FUNC-LABEL: @atomic_sub_local +; R600: LDS_SUB * +; SI: DS_SUB_U32_RTN define void @atomic_sub_local(i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst + %unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst ret void } -; R600-CHECK-LABEL: @atomic_sub_ret_local -; R600-CHECK: LDS_SUB_RET * -; SI-CHECK-LABEL: @atomic_sub_ret_local -; SI-CHECK: DS_SUB_U32_RTN -define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst - store i32 %0, i32 addrspace(1)* %out +; FUNC-LABEL: @atomic_sub_local_const_offset +; R600: LDS_SUB * +; SI: DS_SUB_U32_RTN v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 4 + %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst + ret void +} + +; FUNC-LABEL: @atomic_sub_ret_local +; R600: LDS_SUB_RET * +; SI: DS_SUB_U32_RTN +define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { + %val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @atomic_sub_ret_local_const_offset +; R600: LDS_SUB_RET * +; SI: DS_SUB_U32_RTN v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14 +define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 5 + %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out ret void }