From c1d287b4b73487b6ab094a253a7357addc1d8b84 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 14 Aug 2009 05:13:08 +0000 Subject: [PATCH] Create a new ARM-specific DAG node, VDUP, to represent a splat from a scalar_to_vector. Generate these VDUP nodes during legalization instead of trying to recognize the pattern during selection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78994 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 10 +++++++--- lib/Target/ARM/ARMISelLowering.h | 3 ++- lib/Target/ARM/ARMInstrNEON.td | 32 ++++++++++-------------------- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3c40192ea57..641476c090a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -477,6 +477,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; + case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VLD2D: return "ARMISD::VLD2D"; case ARMISD::VLD3D: return "ARMISD::VLD3D"; @@ -2449,9 +2450,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of the same time so that they get CSEd properly. if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); - if (Lane != 0) - return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0), - DAG.getConstant(Lane, MVT::i32)); + SDValue Op0 = SVN->getOperand(0); + if (Lane == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(ARMISD::VDUP, dl, VT, Op0.getOperand(0)); + } + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0), + DAG.getConstant(Lane, MVT::i32)); } if (isVREVMask(SVN, 64)) return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0)); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d98b6ef9878..88dddf2289e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -115,7 +115,8 @@ namespace llvm { VGETLANEu, // zero-extend vector extract element VGETLANEs, // sign-extend vector extract element - // Vector duplicate lane: + // Vector duplicate: + VDUP, VDUPLANE, // Vector load/store with (de)interleaving diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index acd6533c27f..fad3308c0d9 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -65,6 +65,8 @@ def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; +def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; + // VDUPLANE can produce a quad-register result from a double-register source, // so the result is not constrained to match the source. def NEONvduplane : SDNode<"ARMISD::VDUPLANE", @@ -1747,20 +1749,14 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), // VDUP : Vector Duplicate (from ARM core register to all elements) -def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast(N); - return SVOp->isSplat() && SVOp->getSplatIndex() == 0; -}]>; - class VDUPD opcod1, bits<2> opcod3, string asmSize, ValueType Ty> : NVDup; + [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; class VDUPQ opcod1, bits<2> opcod3, string asmSize, ValueType Ty> : NVDup; + [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; @@ -1771,16 +1767,12 @@ def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), NoItinerary, "vdup", ".32\t$dst, $src", - [(set DPR:$dst, (v2f32 (splat_lo - (scalar_to_vector - (f32 (bitconvert GPR:$src))), - undef)))]>; + [(set DPR:$dst, (v2f32 (NEONvdup + (f32 (bitconvert GPR:$src)))))]>; def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), NoItinerary, "vdup", ".32\t$dst, $src", - [(set QPR:$dst, (v4f32 (splat_lo - (scalar_to_vector - (f32 (bitconvert GPR:$src))), - undef)))]>; + [(set QPR:$dst, (v4f32 (NEONvdup + (f32 (bitconvert GPR:$src)))))]>; // VDUP : Vector Duplicate Lane (from scalar to all elements) @@ -1826,16 +1818,12 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, (outs DPR:$dst), (ins SPR:$src), NoItinerary, "vdup.32\t$dst, ${src:lane}", "", - [(set DPR:$dst, (v2f32 (splat_lo - (scalar_to_vector SPR:$src), - undef)))]>; + [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, (outs QPR:$dst), (ins SPR:$src), NoItinerary, "vdup.32\t$dst, ${src:lane}", "", - [(set QPR:$dst, (v4f32 (splat_lo - (scalar_to_vector SPR:$src), - undef)))]>; + [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; // VMOVN : Vector Narrowing Move defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i",