mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-29 10:32:47 +00:00
R600: Add support for global vector stores with elements less than 32-bits
Tested-by: Aaron Watry <awatry@gmail.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188520 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ec484277dd
commit
4c52d450dc
@ -67,6 +67,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::STORE, MVT::f64, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
|
||||
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
|
||||
// XXX: This can be change to Custom, once ExpandVectorStores can
|
||||
// handle 64-bit stores.
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::f32, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
|
||||
|
||||
@ -187,6 +194,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
|
||||
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
|
||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::STORE: return LowerVectorStore(Op, DAG);
|
||||
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
|
||||
}
|
||||
return Op;
|
||||
@ -487,6 +495,59 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
|
||||
return DAG.getMergeValues(Ops, 2, DL);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op,
|
||||
SelectionDAG &DAG) const {
|
||||
StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
|
||||
EVT MemVT = Store->getMemoryVT();
|
||||
unsigned MemBits = MemVT.getSizeInBits();
|
||||
|
||||
// Byte stores are really expensive, so if possible, try to pack
|
||||
// 32-bit vector truncatating store into an i32 store.
|
||||
// XXX: We could also handle optimize other vector bitwidths
|
||||
if (!MemVT.isVector() || MemBits > 32) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDLoc DL(Op);
|
||||
const SDValue &Value = Store->getValue();
|
||||
EVT VT = Value.getValueType();
|
||||
const SDValue &Ptr = Store->getBasePtr();
|
||||
EVT MemEltVT = MemVT.getVectorElementType();
|
||||
unsigned MemEltBits = MemEltVT.getSizeInBits();
|
||||
unsigned MemNumElements = MemVT.getVectorNumElements();
|
||||
EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
|
||||
SDValue Mask;
|
||||
switch(MemEltBits) {
|
||||
case 8:
|
||||
Mask = DAG.getConstant(0xFF, PackedVT);
|
||||
break;
|
||||
case 16:
|
||||
Mask = DAG.getConstant(0xFFFF, PackedVT);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Cannot lower this vector store");
|
||||
}
|
||||
SDValue PackedValue;
|
||||
for (unsigned i = 0; i < MemNumElements; ++i) {
|
||||
EVT ElemVT = VT.getVectorElementType();
|
||||
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
|
||||
DAG.getConstant(i, MVT::i32));
|
||||
Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
|
||||
Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
|
||||
SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
|
||||
Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
|
||||
if (i == 0) {
|
||||
PackedValue = Elt;
|
||||
} else {
|
||||
PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
|
||||
}
|
||||
}
|
||||
return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
|
||||
MachinePointerInfo(Store->getMemOperand()->getValue()),
|
||||
Store->isVolatile(), Store->isNonTemporal(),
|
||||
Store->getAlignment());
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helper functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -51,6 +51,10 @@ protected:
|
||||
void AnalyzeFormalArguments(CCState &State,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins) const;
|
||||
|
||||
/// \brief Lower vector stores by merging the vector elements into an integer
|
||||
/// of the same bitwidth.
|
||||
SDValue LowerVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
|
||||
|
||||
public:
|
||||
AMDGPUTargetLowering(TargetMachine &TM);
|
||||
|
||||
|
@ -1011,10 +1011,15 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Value = Op.getOperand(1);
|
||||
SDValue Ptr = Op.getOperand(2);
|
||||
|
||||
SDValue Result = AMDGPUTargetLowering::LowerVectorStore(Op, DAG);
|
||||
if (Result.getNode()) {
|
||||
return Result;
|
||||
}
|
||||
|
||||
if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
|
||||
if (StoreNode->isTruncatingStore()) {
|
||||
EVT VT = Value.getValueType();
|
||||
assert(VT == MVT::i32);
|
||||
assert(VT.bitsLE(MVT::i32));
|
||||
EVT MemVT = StoreNode->getMemoryVT();
|
||||
SDValue MaskConstant;
|
||||
if (MemVT == MVT::i8) {
|
||||
@ -1571,6 +1576,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case AMDGPUISD::EXPORT: {
|
||||
SDValue Arg = N->getOperand(1);
|
||||
if (Arg.getOpcode() != ISD::BUILD_VECTOR)
|
||||
|
@ -63,6 +63,49 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-CHECK: @store_v2i8
|
||||
; EG-CHECK: MEM_RAT MSKOR
|
||||
; EG-CHECK-NOT: MEM_RAT MSKOR
|
||||
; SI-CHECK: @store_v2i8
|
||||
; SI-CHECK: BUFFER_STORE_BYTE
|
||||
; SI-CHECK: BUFFER_STORE_BYTE
|
||||
define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
|
||||
entry:
|
||||
%0 = trunc <2 x i32> %in to <2 x i8>
|
||||
store <2 x i8> %0, <2 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; EG-CHECK: @store_v2i16
|
||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||
; CM-CHECK: @store_v2i16
|
||||
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
||||
; SI-CHECK: @store_v2i16
|
||||
; SI-CHECK: BUFFER_STORE_DWORD
|
||||
define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
|
||||
entry:
|
||||
%0 = trunc <2 x i32> %in to <2 x i16>
|
||||
store <2 x i16> %0, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-CHECK: @store_v4i8
|
||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||
; CM-CHECK: @store_v4i8
|
||||
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
|
||||
; SI-CHECK: @store_v4i8
|
||||
; SI-CHECK: BUFFER_STORE_BYTE
|
||||
; SI-CHECK: BUFFER_STORE_BYTE
|
||||
; SI-CHECK: BUFFER_STORE_BYTE
|
||||
; SI-CHECK: BUFFER_STORE_BYTE
|
||||
define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
|
||||
entry:
|
||||
%0 = trunc <4 x i32> %in to <4 x i8>
|
||||
store <4 x i8> %0, <4 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; floating-point store
|
||||
; EG-CHECK: @store_f32
|
||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
|
||||
@ -76,6 +119,25 @@ define void @store_f32(float addrspace(1)* %out, float %in) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-CHECK: @store_v4i16
|
||||
; EG-CHECK: MEM_RAT MSKOR
|
||||
; EG-CHECK: MEM_RAT MSKOR
|
||||
; EG-CHECK: MEM_RAT MSKOR
|
||||
; EG-CHECK: MEM_RAT MSKOR
|
||||
; EG-CHECK-NOT: MEM_RAT MSKOR
|
||||
; SI-CHECK: @store_v4i16
|
||||
; SI-CHECK: BUFFER_STORE_SHORT
|
||||
; SI-CHECK: BUFFER_STORE_SHORT
|
||||
; SI-CHECK: BUFFER_STORE_SHORT
|
||||
; SI-CHECK: BUFFER_STORE_SHORT
|
||||
; SI-CHECK-NOT: BUFFER_STORE_BYTE
|
||||
define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
|
||||
entry:
|
||||
%0 = trunc <4 x i32> %in to <4 x i16>
|
||||
store <4 x i16> %0, <4 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; vec2 floating-point stores
|
||||
; EG-CHECK: @store_v2f32
|
||||
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||
|
Loading…
Reference in New Issue
Block a user