From cdfc3c82a72eeaa604a50426ffc51c21587e0406 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 17 Apr 2006 22:45:49 +0000 Subject: [PATCH] Use movss to insert_vector_elt(v, s, 0). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27782 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 51 +++++++++++++++++++----------- lib/Target/X86/X86InstrSSE.td | 5 +++ 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8fba7547d08..b7b10f1b759 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3015,28 +3015,41 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { N2 = DAG.getConstant(cast(N2)->getValue(), MVT::i32); return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); } else if (MVT::getSizeInBits(BaseVT) == 32) { - // Use two pinsrw instructions to insert a 32 bit value. unsigned Idx = cast(N2)->getValue(); - Idx <<= 1; - if (MVT::isFloatingPoint(N1.getValueType())) { - if (N1.getOpcode() == ISD::LOAD) { - // Just load directly from f32mem to R32. - N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), - N1.getOperand(2)); - } else { - N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); - N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); - N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, - DAG.getConstant(0, MVT::i32)); + if (Idx == 0) { + // Use a movss. + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); + MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + std::vector MaskVec; + MaskVec.push_back(DAG.getConstant(4, BaseVT)); + for (unsigned i = 1; i <= 3; ++i) + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, + DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); + } else { + // Use two pinsrw instructions to insert a 32 bit value. + Idx <<= 1; + if (MVT::isFloatingPoint(N1.getValueType())) { + if (N1.getOpcode() == ISD::LOAD) { + // Just load directly from f32mem to R32. + N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), + N1.getOperand(2)); + } else { + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); + N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); + N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, + DAG.getConstant(0, MVT::i32)); + } } + N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); + N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, + DAG.getConstant(Idx, MVT::i32)); + N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); + N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, + DAG.getConstant(Idx+1, MVT::i32)); + return DAG.getNode(ISD::BIT_CONVERT, VT, N0); } - N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); - N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, - DAG.getConstant(Idx, MVT::i32)); - N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); - N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, - DAG.getConstant(Idx+1, MVT::i32)); - return DAG.getNode(ISD::BIT_CONVERT, VT, N0); } return SDOperand(); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b1aa80f95ea..e3a6aac0405 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2414,6 +2414,11 @@ def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (loadv2i64 addr:$src)), (undef), MOVSLDUP_shuffle_mask)), (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; +// vector_shuffle v1, v2 <4, 1, 2, 3> +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVS_shuffle_mask)), + (MOVLPSrr VR128:$src1, VR128:$src2)>; + // 128-bit logical shifts def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>,