From a599bff101095e528198ae85739fe8b97ffba82b Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Tue, 4 Aug 2009 00:36:16 +0000 Subject: [PATCH] Lower Neon VLD* intrinsics to custom DAG nodes, and manually allocate the results to fixed registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78025 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 59 ++++++++++++++++++++++++++++++ lib/Target/ARM/ARMISelLowering.h | 8 +++- lib/Target/ARM/ARMInstrNEON.td | 8 ++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index deb4df0df29..eb7754745cb 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -323,6 +323,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -463,6 +464,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ"; + case ARMISD::VLD2D: return "ARMISD::VLD2D"; + case ARMISD::VLD3D: return "ARMISD::VLD3D"; + case ARMISD::VLD4D: return "ARMISD::VLD4D"; } } @@ -1318,6 +1322,60 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } +static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG, + unsigned Opcode, unsigned NumVecs) { + SDNode *Node = Op.getNode(); + MVT VT = Node->getValueType(0); + DebugLoc dl = Op.getDebugLoc(); + + if (!VT.is64BitVector()) + return SDValue(); // unimplemented + + SDValue Ops[] = { Node->getOperand(0), + Node->getOperand(1) }; + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Result = DAG.getNode(Opcode, dl, Tys, Ops, 2); + + static const unsigned VLDRegs[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3 + }; + + SmallVector ResultVals; + SDValue Chain = Result.getValue(0); + SDValue Flag = Result.getValue(1); + for (unsigned N = 0; N < NumVecs; ++N) { + Chain = DAG.getCopyFromReg(Chain, dl, VLDRegs[N], VT, Flag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + Flag = Chain.getValue(2); + } + ResultVals.push_back(Chain); + return DAG.getNode(ISD::MERGE_VALUES, dl, Node->getVTList(), + ResultVals.data(), NumVecs + 1); +} + +SDValue +ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + switch (IntNo) { + case Intrinsic::arm_neon_vld2i: + case Intrinsic::arm_neon_vld2f: + return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D, 2); + case Intrinsic::arm_neon_vld3i: + case Intrinsic::arm_neon_vld3f: + return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D, 3); + case Intrinsic::arm_neon_vld4i: + case Intrinsic::arm_neon_vld4f: + return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D, 4); + case Intrinsic::arm_neon_vst2i: + case Intrinsic::arm_neon_vst2f: + case Intrinsic::arm_neon_vst3i: + case Intrinsic::arm_neon_vst3f: + case Intrinsic::arm_neon_vst4i: + case Intrinsic::arm_neon_vst4f: + default: return SDValue(); // Don't custom lower most intrinsics. + } +} + SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -2354,6 +2412,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); case ISD::SHL: diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d0806fb9c1d..f32db3ff931 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -114,7 +114,12 @@ namespace llvm { VGETLANEs, // sign-extend vector extract element // Vector duplicate lane (128-bit result only; 64-bit is a shuffle) - VDUPLANEQ // splat a lane from a 64-bit vector to a 128-bit vector + VDUPLANEQ, // splat a lane from a 64-bit vector to a 128-bit vector + + // Vector load/store with (de)interleaving + VLD2D, + VLD3D, + VLD4D }; } @@ -237,6 +242,7 @@ namespace llvm { SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, unsigned CallingConv, SelectionDAG &DAG); SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG); SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); SDValue LowerRET(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8641d6274a7..5e8a4b5994f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -68,6 +68,14 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ", SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>; +def SDTARMVLD : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def NEONvld2d : SDNode<"ARMISD::VLD2D", SDTARMVLD, + [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; +def NEONvld3d : SDNode<"ARMISD::VLD3D", SDTARMVLD, + [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; +def NEONvld4d : SDNode<"ARMISD::VLD4D", SDTARMVLD, + [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; + //===----------------------------------------------------------------------===// // NEON operand definitions //===----------------------------------------------------------------------===//