From 1201cc156f28c0f4f14cea94d2735632cbd3d6f4 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 28 Aug 2014 03:52:45 +0000 Subject: [PATCH] [x86] Inline an SSE4 helper function for INSERT_VECTOR_ELT lowering, no functionality changed. Separating this into two functions wasn't helping. There was a decent amount of boilerplate duplicated, and some subsequent refactorings here will pull even more common code out. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216644 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 103 +++++++++++++---------------- 1 file changed, 45 insertions(+), 58 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 015d2c9ccc0..e6c00719680 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10653,59 +10653,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return SDValue(); } -static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); - MVT EltVT = VT.getVectorElementType(); - SDLoc dl(Op); - - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - - if (!VT.is128BitVector()) - return SDValue(); - - if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && - isa(N2)) { - unsigned Opc; - if (VT == MVT::v8i16) { - Opc = X86ISD::PINSRW; - } else { - assert(VT == MVT::v16i8); - Opc = X86ISD::PINSRB; - } - - // Transform it so it match pinsr{b,w} which expects a GR32 as its second - // argument. - if (N1.getValueType() != MVT::i32) - N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); - if (N2.getValueType() != MVT::i32) - N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue()); - return DAG.getNode(Opc, dl, VT, N0, N1, N2); - } - - if (EltVT == MVT::f32 && isa(N2)) { - // Bits [7:6] of the constant are the source select. This will always be - // zero here. The DAG Combiner may combine an extract_elt index into these - // bits. For example (insert (extract, 3), 2) could be matched by putting - // the '3' into bits [7:6] of X86ISD::INSERTPS. - // Bits [5:4] of the constant are the destination select. This is the - // value of the incoming immediate. - // Bits [3:0] of the constant are the zero mask. The DAG Combiner may - // combine either bitwise AND or insert of float 0.0 to set these bits. - N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue() << 4); - // Create this as a scalar to vector.. - N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); - return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); - } - - if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa(N2)) { - // PINSR* works with constant index. - return Op; - } - return SDValue(); -} - /// Insert one bit to mask vector, like v16i1 or v8i1. /// AVX-512 feature. SDValue @@ -10744,7 +10691,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); - + if (EltVT == MVT::i1) return InsertBitToMaskVector(Op, DAG); @@ -10753,8 +10700,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - // If this is a 256-bit vector result, first extract the 128-bit vector, - // insert the element into the extracted half and then place it back. + // If the vector is wider than 128 bits, extract the 128-bit subvector, insert + // into that, and then insert the subvector back into the result. if (VT.is256BitVector() || VT.is512BitVector()) { if (!isa(N2)) return SDValue(); @@ -10774,8 +10721,48 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { return Insert128BitVector(N0, V, IdxVal, DAG, dl); } - if (Subtarget->hasSSE41()) - return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); + if (Subtarget->hasSSE41()) { + if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && + isa(N2)) { + unsigned Opc; + if (VT == MVT::v8i16) { + Opc = X86ISD::PINSRW; + } else { + assert(VT == MVT::v16i8); + Opc = X86ISD::PINSRB; + } + + // Transform it so it match pinsr{b,w} which expects a GR32 as its second + // argument. + if (N1.getValueType() != MVT::i32) + N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); + if (N2.getValueType() != MVT::i32) + N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue()); + return DAG.getNode(Opc, dl, VT, N0, N1, N2); + } + + if (EltVT == MVT::f32 && isa(N2)) { + // Bits [7:6] of the constant are the source select. This will always be + // zero here. The DAG Combiner may combine an extract_elt index into + // these + // bits. For example (insert (extract, 3), 2) could be matched by + // putting + // the '3' into bits [7:6] of X86ISD::INSERTPS. + // Bits [5:4] of the constant are the destination select. This is the + // value of the incoming immediate. + // Bits [3:0] of the constant are the zero mask. The DAG Combiner may + // combine either bitwise AND or insert of float 0.0 to set these bits. + N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue() << 4); + // Create this as a scalar to vector.. + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); + return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); + } + + if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa(N2)) { + // PINSR* works with constant index. + return Op; + } + } if (EltVT == MVT::i8) return SDValue();