mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-07 01:38:26 +00:00
R600/SI: Use READ2/WRITE2 instructions for 64-bit mem ops with 32-bit alignment
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216279 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ec4cb3346d
commit
f50f927d65
@ -91,6 +91,8 @@ private:
|
||||
bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
|
||||
unsigned OffsetBits) const;
|
||||
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
|
||||
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
|
||||
SDValue &Offset1) const;
|
||||
void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
||||
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
|
||||
@ -782,6 +784,31 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset0,
|
||||
SDValue &Offset1) const {
|
||||
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||
SDValue N0 = Addr.getOperand(0);
|
||||
SDValue N1 = Addr.getOperand(1);
|
||||
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
||||
unsigned DWordOffset0 = C1->getZExtValue() / 4;
|
||||
unsigned DWordOffset1 = DWordOffset0 + 1;
|
||||
// (add n0, c0)
|
||||
if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
|
||||
Base = N0;
|
||||
Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
|
||||
Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// default case
|
||||
Base = Addr;
|
||||
Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
|
||||
Offset1 = CurDAG->getTargetConstant(1, MVT::i8);
|
||||
return true;
|
||||
}
|
||||
|
||||
static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
|
||||
return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
|
||||
Ptr), 0);
|
||||
|
@ -282,6 +282,17 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
|
||||
return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
|
||||
}]>;
|
||||
|
||||
def local_load_aligned8bytes : Aligned8Bytes <
|
||||
(ops node:$ptr), (local_load node:$ptr)
|
||||
>;
|
||||
|
||||
def local_store_aligned8bytes : Aligned8Bytes <
|
||||
(ops node:$val, node:$ptr), (local_store node:$val, node:$ptr)
|
||||
>;
|
||||
|
||||
class local_binary_atomic_op<SDNode atomic_op> :
|
||||
PatFrag<(ops node:$ptr, node:$value),
|
||||
|
@ -192,6 +192,7 @@ def tfe : Operand <i1> {
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
|
||||
def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
|
||||
|
||||
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
|
||||
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
|
||||
|
@ -2530,7 +2530,18 @@ def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
|
||||
def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
|
||||
def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
|
||||
def : DSReadPat <DS_READ_B32, i32, local_load>;
|
||||
def : DSReadPat <DS_READ_B64, v2i32, local_load>;
|
||||
|
||||
let AddedComplexity = 100 in {
|
||||
|
||||
def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
|
||||
|
||||
} // End AddedComplexity = 100
|
||||
|
||||
def : Pat <
|
||||
(v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
|
||||
i8:$offset1))),
|
||||
(DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1)
|
||||
>;
|
||||
|
||||
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
|
||||
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
|
||||
@ -2540,7 +2551,18 @@ class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
|
||||
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
|
||||
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
|
||||
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
|
||||
def : DSWritePat <DS_WRITE_B64, v2i32, local_store>;
|
||||
|
||||
let AddedComplexity = 100 in {
|
||||
|
||||
def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
|
||||
} // End AddedComplexity = 100
|
||||
|
||||
def : Pat <
|
||||
(local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
|
||||
i8:$offset1)),
|
||||
(DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
|
||||
(EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
|
||||
>;
|
||||
|
||||
multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
|
||||
def : Pat <
|
||||
|
@ -32,9 +32,8 @@ define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> ad
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: This should use ds_read2_b32
|
||||
; SI-LABEL: @load_lds_i64_align_4
|
||||
; SI: DS_READ_B64
|
||||
; SI: DS_READ2_B32
|
||||
; SI: S_ENDPGM
|
||||
define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
|
||||
%val = load i64 addrspace(3)* %in, align 4
|
||||
@ -42,9 +41,61 @@ define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspac
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @load_lds_i64_align_4_with_offset
|
||||
; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}}, 0x8, 0x9
|
||||
; SI: S_ENDPGM
|
||||
define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
|
||||
%ptr = getelementptr i64 addrspace(3)* %in, i32 4
|
||||
%val = load i64 addrspace(3)* %ptr, align 4
|
||||
store i64 %val, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @load_lds_i64_align_4_with_split_offset
|
||||
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
|
||||
; SI: DS_READ2_B32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}}, 0x0, 0x1
|
||||
; SI: S_ENDPGM
|
||||
define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
|
||||
%ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
|
||||
%ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
|
||||
%ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
|
||||
%val = load i64 addrspace(3)* %ptri64, align 4
|
||||
store i64 %val, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Need to fix this case.
|
||||
; define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
|
||||
; %val = load i64 addrspace(3)* %in, align 1
|
||||
; store i64 %val, i64 addrspace(1)* %out, align 8
|
||||
; ret void
|
||||
; }
|
||||
|
||||
; SI-LABEL: @store_lds_i64_align_4
|
||||
; SI: DS_WRITE2_B32
|
||||
; SI: S_ENDPGM
|
||||
define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
|
||||
store i64 %val, i64 addrspace(3)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @store_lds_i64_align_4_with_offset
|
||||
; DS_WRITE_B32 v[{{[0-9]+}}], v[{{[0-9]+}}], v{{[0-9]}}, 0x9, 0x9
|
||||
; SI: S_ENDPGM
|
||||
define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
|
||||
%ptr = getelementptr i64 addrspace(3)* %out, i32 4
|
||||
store i64 0, i64 addrspace(3)* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @store_lds_i64_align_4_with_split_offset
|
||||
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
|
||||
; DS_WRITE_B32 v[{{[0-9]+}}], v[{{[0-9]+}}], v{{[0-9]}}, 0x0, 0x1
|
||||
; SI: S_ENDPGM
|
||||
define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
|
||||
%ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
|
||||
%ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
|
||||
%ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
|
||||
store i64 0, i64 addrspace(3)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user