From d08a9303614355cfdcac5f2c27c09ce809565423 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 26 Aug 2013 15:06:04 +0000 Subject: [PATCH] R600: Add support for vector local memory loads git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189226 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 23 +++++++++++++++++++++++ lib/Target/R600/AMDGPUISelLowering.h | 2 ++ lib/Target/R600/R600ISelLowering.cpp | 8 ++++++++ lib/Target/R600/SIISelLowering.cpp | 17 +++++++++++++++++ test/CodeGen/R600/load.ll | 14 ++++++++++++++ 5 files changed, 64 insertions(+) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 88867b6e9da..1237323ee89 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -424,6 +424,29 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, return Op; } +SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, + SelectionDAG &DAG) const { + LoadSDNode *Load = dyn_cast(Op); + EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); + EVT EltVT = Op.getValueType().getVectorElementType(); + EVT PtrVT = Load->getBasePtr().getValueType(); + unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); + SmallVector Loads; + SDLoc SL(Op); + + for (unsigned i = 0, e = NumElts; i != e; ++i) { + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), + DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); + Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, + Load->getChain(), Ptr, + MachinePointerInfo(Load->getMemOperand()->getValue()), + MemEltVT, Load->isVolatile(), Load->isNonTemporal(), + Load->getAlignment())); + } + return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0], + Loads.size()); +} + SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const { StoreSDNode *Store = dyn_cast(Op); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index f739aed9783..75ac4c2f183 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -50,6 +50,8 @@ protected: unsigned Reg, EVT VT) const; SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const; + /// \brief Split a vector load into multiple scalar loads. + SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 9bc8e8a818f..f0242b86c0b 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1155,6 +1155,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const SDValue Ptr = Op.getOperand(1); SDValue LoweredLoad; + if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { + SDValue MergedValues[2] = { + SplitVectorLoad(Op, DAG), + Chain + }; + return DAG.getMergeValues(MergedValues, 2, DL); + } + int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); if (ConstantBlock > -1) { SDValue Result; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9cbba6c54ea..f196059b894 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -66,6 +66,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::BITCAST, MVT::i128, Legal); + // We need to custom lower vector stores from local memory + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -368,6 +372,19 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::LOAD: { + LoadSDNode *Load = dyn_cast(Op); + if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + Op.getValueType().isVector()) { + SDValue MergedValues[2] = { + SplitVectorLoad(Op, DAG), + Load->getChain() + }; + return DAG.getMergeValues(MergedValues, 2, SDLoc(Op)); + } else { + return SDValue(); + } + } case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index ba8250650fc..6cf1af71aea 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -516,3 +516,17 @@ entry: store float %0, float addrspace(1)* %out ret void } + +; load a v2f32 value from the local address space +; R600-CHECK: @load_v2f32_local +; R600-CHECK: LDS_READ_RET +; R600-CHECK: LDS_READ_RET +; SI-CHECK: @load_v2f32_local +; SI-CHECK: DS_READ_B32 +; SI-CHECK: DS_READ_B32 +define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { +entry: + %0 = load <2 x float> addrspace(3)* %in + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +}