From 243fcc5a6901e75e7ca5c374e706a634593ec17f Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Tue, 1 Sep 2009 04:26:28 +0000 Subject: [PATCH] Generate code for vld{234}_lane intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@80656 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 57 +++++++++++++++++++++++++++++ lib/Target/ARM/ARMISelLowering.cpp | 26 +++++++++++++ lib/Target/ARM/ARMInstrNEON.td | 38 +++++++++++++++++++ lib/Target/ARM/NEONPreAllocPass.cpp | 9 +++++ 4 files changed, 130 insertions(+) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 97edb973e41..87c0d11b546 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1375,6 +1375,63 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { return CurDAG->getTargetNode(Opc, dl, ResTys, Ops, 4); } + case Intrinsic::arm_neon_vld2lane: { + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld2lane type"); + case MVT::v8i8: Opc = ARM::VLD2LNd8; break; + case MVT::v4i16: Opc = ARM::VLD2LNd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD2LNd32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), Chain }; + return CurDAG->getTargetNode(Opc, dl, VT, VT, MVT::Other, Ops, 7); + } + + case Intrinsic::arm_neon_vld3lane: { + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld3lane type"); + case MVT::v8i8: Opc = ARM::VLD3LNd8; break; + case MVT::v4i16: Opc = ARM::VLD3LNd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD3LNd32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), N->getOperand(6), Chain }; + return CurDAG->getTargetNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 8); + } + + case Intrinsic::arm_neon_vld4lane: { + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld4lane type"); + case MVT::v8i8: Opc = ARM::VLD4LNd8; break; + case MVT::v4i16: Opc = ARM::VLD4LNd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD4LNd32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), N->getOperand(6), + N->getOperand(7), Chain }; + std::vector ResTys(4, VT); + ResTys.push_back(MVT::Other); + return CurDAG->getTargetNode(Opc, dl, ResTys, Ops, 9); + } + case Intrinsic::arm_neon_vst2: { SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 10a39a71678..0e1606f950a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1350,6 +1350,26 @@ static SDValue LowerNeonVSTIntrinsic(SDValue Op, SelectionDAG &DAG, return SDValue(); } +static SDValue LowerNeonVLDLaneIntrinsic(SDValue Op, SelectionDAG &DAG, + unsigned NumVecs) { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + + if (!VT.is64BitVector()) + return SDValue(); // unimplemented + + // Change the lane number operand to be a TargetConstant; otherwise it + // will be legalized into a register. + ConstantSDNode *Lane = dyn_cast(Node->getOperand(NumVecs+3)); + if (!Lane) { + assert(false && "vld lane number must be a constant"); + return SDValue(); + } + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[NumVecs+3] = DAG.getTargetConstant(Lane->getZExtValue(), MVT::i32); + return DAG.UpdateNodeOperands(Op, &Ops[0], Ops.size()); +} + SDValue ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); @@ -1358,6 +1378,12 @@ ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { return LowerNeonVLDIntrinsic(Op, DAG, 3); case Intrinsic::arm_neon_vld4: return LowerNeonVLDIntrinsic(Op, DAG, 4); + case Intrinsic::arm_neon_vld2lane: + return LowerNeonVLDLaneIntrinsic(Op, DAG, 2); + case Intrinsic::arm_neon_vld3lane: + return LowerNeonVLDLaneIntrinsic(Op, DAG, 3); + case Intrinsic::arm_neon_vld4lane: + return LowerNeonVLDLaneIntrinsic(Op, DAG, 4); case Intrinsic::arm_neon_vst3: return LowerNeonVSTIntrinsic(Op, DAG, 3); case Intrinsic::arm_neon_vst4: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c278e8b071d..6fbcf6cbff3 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -207,6 +207,44 @@ class VLD4D def VLD4d8 : VLD4D<"vld4.8">; def VLD4d16 : VLD4D<"vld4.16">; def VLD4d32 : VLD4D<"vld4.32">; + +// VLD2LN : Vector Load (single 2-element structure to one lane) +class VLD2LND + : NLdSt<(outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + NoItinerary, + !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), + "$src1 = $dst1, $src2 = $dst2", []>; + +def VLD2LNd8 : VLD2LND<"vld2.8">; +def VLD2LNd16 : VLD2LND<"vld2.16">; +def VLD2LNd32 : VLD2LND<"vld2.32">; + +// VLD3LN : Vector Load (single 3-element structure to one lane) +class VLD3LND + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), NoItinerary, + !strconcat(OpcodeStr, + "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; + +def VLD3LNd8 : VLD3LND<"vld3.8">; +def VLD3LNd16 : VLD3LND<"vld3.16">; +def VLD3LNd32 : VLD3LND<"vld3.32">; + +// VLD4LN : Vector Load (single 4-element structure to one lane) +class VLD4LND + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), NoItinerary, + !strconcat(OpcodeStr, + "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"), + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; + +def VLD4LNd8 : VLD4LND<"vld4.8">; +def VLD4LNd16 : VLD4LND<"vld4.16">; +def VLD4LNd32 : VLD4LND<"vld4.32">; } // VST1 : Vector Store (multiple single elements) diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index f1f3b311ba4..69380130428 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -45,6 +45,9 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, case ARM::VLD2d8: case ARM::VLD2d16: case ARM::VLD2d32: + case ARM::VLD2LNd8: + case ARM::VLD2LNd16: + case ARM::VLD2LNd32: FirstOpnd = 0; NumRegs = 2; return true; @@ -52,6 +55,9 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: + case ARM::VLD3LNd8: + case ARM::VLD3LNd16: + case ARM::VLD3LNd32: FirstOpnd = 0; NumRegs = 3; return true; @@ -59,6 +65,9 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, case ARM::VLD4d8: case ARM::VLD4d16: case ARM::VLD4d32: + case ARM::VLD4LNd8: + case ARM::VLD4LNd16: + case ARM::VLD4LNd32: FirstOpnd = 0; NumRegs = 4; return true;