From 0ce371082565330672c276f76297f46b362d74b7 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 14 Aug 2009 05:08:32 +0000 Subject: [PATCH] During legalization, change Neon vdup_lane operations from shuffles to target-specific VDUPLANE nodes. This allows the subreg handling for the quad-register version to be done easily with Pats in the .td file, instead of with custom code in ARMISelDAGToDAG.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78993 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 41 ------------------------------ lib/Target/ARM/ARMISelLowering.cpp | 8 +++++- lib/Target/ARM/ARMISelLowering.h | 4 +-- lib/Target/ARM/ARMInstrNEON.td | 41 +++++++++++++++++------------- 4 files changed, 33 insertions(+), 61 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9ca80ae1b91..e7985fdae3c 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -36,9 +36,6 @@ using namespace llvm; -static const unsigned arm_dsubreg_0 = 5; -static const unsigned arm_dsubreg_1 = 6; - //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -1307,44 +1304,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { MVT::Other, Ops, 3); } - case ISD::VECTOR_SHUFFLE: { - EVT VT = Op.getValueType(); - - // Match 128-bit splat to VDUPLANEQ. (This could be done with a Pat in - // ARMInstrNEON.td but it is awkward because the shuffle mask needs to be - // transformed first into a lane number and then to both a subregister - // index and an adjusted lane number.) If the source operand is a - // SCALAR_TO_VECTOR, leave it so it will be matched later as a VDUP. - ShuffleVectorSDNode *SVOp = cast(N); - if (VT.is128BitVector() && SVOp->isSplat() && - Op.getOperand(0).getOpcode() != ISD::SCALAR_TO_VECTOR && - Op.getOperand(1).getOpcode() == ISD::UNDEF) { - unsigned LaneVal = SVOp->getSplatIndex(); - - EVT HalfVT; - unsigned Opc = 0; - switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled VDUP splat type"); - case MVT::i8: Opc = ARM::VDUPLN8q; HalfVT = MVT::v8i8; break; - case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break; - case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break; - case MVT::f32: Opc = ARM::VDUPLNfq; HalfVT = MVT::v2f32; break; - } - - // The source operand needs to be changed to a subreg of the original - // 128-bit operand, and the lane number needs to be adjusted accordingly. - unsigned NumElts = VT.getVectorNumElements() / 2; - unsigned SRVal = (LaneVal < NumElts ? arm_dsubreg_0 : arm_dsubreg_1); - SDValue SR = CurDAG->getTargetConstant(SRVal, MVT::i32); - SDValue NewLane = CurDAG->getTargetConstant(LaneVal % NumElts, MVT::i32); - SDNode *SubReg = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG, - dl, HalfVT, N->getOperand(0), SR); - return CurDAG->SelectNodeTo(N, Opc, VT, SDValue(SubReg, 0), NewLane); - } - - break; - } - case ARMISD::VLD2D: { SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc)) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a36fdbf2474..3c40192ea57 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -477,7 +477,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; - case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ"; + case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VLD2D: return "ARMISD::VLD2D"; case ARMISD::VLD3D: return "ARMISD::VLD3D"; case ARMISD::VLD4D: return "ARMISD::VLD4D"; @@ -2447,6 +2447,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of inconsistencies between legalization and selection. // FIXME: floating-point vectors should be canonicalized to integer vectors // of the same time so that they get CSEd properly. + if (SVN->isSplat()) { + int Lane = SVN->getSplatIndex(); + if (Lane != 0) + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0), + DAG.getConstant(Lane, MVT::i32)); + } if (isVREVMask(SVN, 64)) return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0)); if (isVREVMask(SVN, 32)) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 328959a24d9..d98b6ef9878 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -115,8 +115,8 @@ namespace llvm { VGETLANEu, // zero-extend vector extract element VGETLANEs, // sign-extend vector extract element - // Vector duplicate lane (128-bit result only; 64-bit is a shuffle) - VDUPLANEQ, // splat a lane from a 64-bit vector to a 128-bit vector + // Vector duplicate lane: + VDUPLANE, // Vector load/store with (de)interleaving VLD2D, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 7654be7b3fd..acd6533c27f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -65,8 +65,11 @@ def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; -def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ", - SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>; +// VDUPLANE can produce a quad-register result from a double-register source, +// so the result is not constrained to match the source. +def NEONvduplane : SDNode<"ARMISD::VDUPLANE", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisVT<2, i32>]>>; def SDTARMVLD2 : SDTypeProfile<2, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDTARMVLD3 : SDTypeProfile<3, 1, [SDTCisSameAs<0, 1>, @@ -1781,31 +1784,18 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), // VDUP : Vector Duplicate Lane (from scalar to all elements) -def SHUFFLE_get_splat_lane : SDNodeXForm(N); - return CurDAG->getTargetConstant(SVOp->getSplatIndex(), MVT::i32); -}]>; - -def splat_lane : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast(N); - return SVOp->isSplat(); -}], SHUFFLE_get_splat_lane>; - class VDUPLND op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, (outs DPR:$dst), (ins DPR:$src, lane_cst:$lane), NoItinerary, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", - [(set DPR:$dst, (Ty (splat_lane:$lane DPR:$src, undef)))]>; + [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -// vector_shuffle requires that the source and destination types match, so -// VDUP to a 128-bit result uses a target-specific VDUPLANEQ node. class VDUPLNQ op19_18, bits<2> op17_16, string OpcodeStr, ValueType ResTy, ValueType OpTy> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, (outs QPR:$dst), (ins DPR:$src, lane_cst:$lane), NoItinerary, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", - [(set QPR:$dst, (ResTy (NEONvduplaneq (OpTy DPR:$src), imm:$lane)))]>; + [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; @@ -1816,6 +1806,23 @@ def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; +def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), + (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i8_reg imm:$lane))), + (SubReg_i8_lane imm:$lane)))>; +def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), + (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), + (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), + (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, (outs DPR:$dst), (ins SPR:$src), NoItinerary, "vdup.32\t$dst, ${src:lane}", "",