R600/SI: Add support for global atomic add

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218457 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2014-09-25 18:30:26 +00:00
parent 79826e015e
commit 29d48e6a49
5 changed files with 150 additions and 3 deletions

View File

@ -96,11 +96,16 @@ private:
SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &Offset) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &Offset,
SDValue &SLC) const;
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
SDNode *SelectAddrSpaceCast(SDNode *N);
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@ -891,6 +896,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return false;
}
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, MVT::i1);
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
}
static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
@ -1001,6 +1014,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return false;
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &GLC) const {
SDValue SLC, TFE;
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
// FIXME: This is incorrect and only enough to be able to compile.
SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);

View File

@ -380,6 +380,14 @@ def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
}]>;
class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
>;
def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
//===----------------------------------------------------------------------===//

View File

@ -196,8 +196,10 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
@ -929,9 +931,10 @@ class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp =
let mayLoad = 1;
}
class MUBUFAddr64Table <bit is_addr64> {
class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
bit IsAddr64 = is_addr64;
string OpName = NAME # suffix;
}
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
@ -947,6 +950,80 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
let mayLoad = 0;
}
class MUBUFAtomicAddr64 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
: MUBUF <op, outs, ins, asm, pattern> {
let offen = 0;
let idxen = 0;
let addr64 = 1;
let tfe = 0;
let lds = 0;
let soffset = 128;
}
class MUBUFAtomicOffset <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
: MUBUF <op, outs, ins, asm, pattern> {
let offen = 0;
let idxen = 0;
let addr64 = 0;
let tfe = 0;
let lds = 0;
let vaddr = 0;
}
multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
ValueType vt, SDPatternOperator atomic> {
let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in {
// No return variants
let glc = 0 in {
def _ADDR64 : MUBUFAtomicAddr64 <
op, (outs),
(ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", []
>, MUBUFAddr64Table<1>, AtomicNoRet<NAME#"_ADDR64", 0>;
def _OFFSET : MUBUFAtomicOffset <
op, (outs),
(ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
SSrc_32:$soffset, slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", []
>, MUBUFAddr64Table<0>, AtomicNoRet<NAME#"_OFFSET", 0>;
} // glc = 0
// Variant that return values
let glc = 1, Constraints = "$vdata = $vdata_in",
DisableEncoding = "$vdata_in" in {
def _RTN_ADDR64 : MUBUFAtomicAddr64 <
op, (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
[(set vt:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
i1:$slc), vt:$vdata_in))]
>, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet<NAME#"_ADDR64", 1>;
def _RTN_OFFSET : MUBUFAtomicOffset <
op, (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
SSrc_32:$soffset, slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
[(set vt:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
i1:$slc), vt:$vdata_in))]
>, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet<NAME#"_OFFSET", 1>;
} // glc = 1
} // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
}
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> {
@ -1292,7 +1369,7 @@ def getMCOpcode : InstrMapping {
def getAddr64Inst : InstrMapping {
let FilterClass = "MUBUFAddr64Table";
let RowFields = ["NAME"];
let RowFields = ["OpName"];
let ColFields = ["IsAddr64"];
let KeyCol = ["0"];
let ValueCols = [["1"]];

View File

@ -891,7 +891,9 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global
>;
//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;

View File

@ -0,0 +1,39 @@
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; FUNC-LABEL: @atomic_load_i32_offset
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
define void @atomic_load_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: @atomic_load_i32_ret_offset
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; SI: BUFFER_STORE_DWORD [[RET]]
define void @atomic_load_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
}
; FUNC-LABEL: @atomic_load_i32_addr64
; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_load_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: @atomic_load_i32_ret_addr64
; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; SI: BUFFER_STORE_DWORD [[RET]]
define void @atomic_load_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
%ptr = getelementptr i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
}