From 15684b29552393553524171bff1913e750f390f8 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 24 Apr 2009 12:40:33 +0000 Subject: [PATCH] Revert 69952. Causes testsuite failures on linux x86-64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@69967 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAG.h | 7 - include/llvm/CodeGen/SelectionDAGNodes.h | 28 +- include/llvm/Target/TargetLowering.h | 6 +- include/llvm/Target/TargetSelectionDAG.td | 13 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 192 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 211 ++- .../SelectionDAG/LegalizeVectorTypes.cpp | 89 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 153 +- .../SelectionDAG/SelectionDAGBuild.cpp | 153 +- lib/Target/CellSPU/SPUISelLowering.cpp | 61 +- lib/Target/PowerPC/PPCISelLowering.cpp | 214 +-- lib/Target/PowerPC/PPCISelLowering.h | 12 +- lib/Target/PowerPC/PPCInstrAltivec.td | 181 +- lib/Target/X86/X86ISelLowering.cpp | 1663 ++++++++++------- lib/Target/X86/X86ISelLowering.h | 52 +- lib/Target/X86/X86InstrInfo.td | 1 - lib/Target/X86/X86InstrMMX.td | 114 +- lib/Target/X86/X86InstrSSE.td | 592 +++--- test/CodeGen/X86/vec_clear.ll | 6 +- test/CodeGen/X86/vec_shuffle-10.ll | 10 +- test/CodeGen/X86/vec_shuffle-16.ll | 12 +- test/CodeGen/X86/vec_shuffle-30.ll | 7 +- test/CodeGen/X86/vec_shuffle-31.ll | 2 +- utils/TableGen/CodeGenDAGPatterns.cpp | 42 +- utils/TableGen/CodeGenDAGPatterns.h | 6 +- 25 files changed, 2187 insertions(+), 1640 deletions(-) diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index a350d5fd93b..9ae92f388e1 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -353,13 +353,6 @@ public: SDValue getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code); - - /// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node. The number of - /// elements in VT, which must be a vector type, must match the number of - /// mask elements NumElts. A negative integer mask element is treated as - /// undefined. - SDValue getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, - const int *MaskElts); /// getZeroExtendInReg - Return the expression required to zero extend the Op /// value assuming it was the smaller SrcTy value. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index a850b3fcb59..aaa39674d78 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1703,32 +1703,6 @@ public: } }; -class ShuffleVectorSDNode : public SDNode { - SDUse Ops[2]; - int *Mask; -protected: - friend class SelectionDAG; - ShuffleVectorSDNode(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, int *M) - : SDNode(ISD::VECTOR_SHUFFLE, dl, getSDVTList(VT)), Mask(M) { - InitOperands(Ops, N1, N2); - } -public: - - const int * getMask() const { return Mask; } - - bool isSplat() { return isSplatMask(Mask, getValueType(0)); } - int getSplatIndex() { - assert(isSplat() && "Cannot get splat index for non-splat!"); - return Mask[0]; - } - static bool isSplatMask(const int *Mask, MVT VT); - - static bool classof(const ShuffleVectorSDNode *) { return true; } - static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::VECTOR_SHUFFLE; - } -}; - class ConstantSDNode : public SDNode { const ConstantInt *Value; friend class SelectionDAG; @@ -2110,7 +2084,7 @@ public: return N->getOpcode() == ISD::CONDCODE; } }; - + /// CvtRndSatSDNode - NOTE: avoid using this node as this may disappear in the /// future and most targets don't support it. class CvtRndSatSDNode : public SDNode { diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index a5f064dcdee..477505e2f79 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -328,7 +328,7 @@ public: /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. - virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const { + virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const { return true; } @@ -336,7 +336,9 @@ public: /// used by Targets can use this to indicate if there is a suitable /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. - virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const { + virtual bool isVectorClearMaskLegal(const std::vector &BVOps, + MVT EVT, + SelectionDAG &DAG) const { return false; } diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 2cd29676dbf..7f39bb2f830 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -51,6 +51,15 @@ class SDTCisOpSmallerThanOp : SDTypeConstraint{ int BigOperandNum = BigOp; } +/// SDTCisIntVectorOfSameSize - This indicates that ThisOp and OtherOp are +/// vector types, and that ThisOp is the result of +/// MVT::getIntVectorWithNumElements with the number of elements +/// that ThisOp has. +class SDTCisIntVectorOfSameSize + : SDTypeConstraint { + int OtherOpNum = OtherOp; +} + /// SDTCisEltOfVec - This indicates that ThisOp is a scalar type of the same /// type as the element type of OtherOp, which is a vector type. class SDTCisEltOfVec @@ -166,8 +175,8 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> ]>; -def SDTVecShuffle : SDTypeProfile<1, 2, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> +def SDTVecShuffle : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisIntVectorOfSameSize<3, 0> ]>; def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2> diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 40807d8909b..72b3e3627ed 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5098,21 +5098,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), InVec.getValueType(), &Ops[0], Ops.size()); } - // If the invec is an UNDEF and if EltNo is a constant, create a new - // BUILD_VECTOR with undef elements and the inserted element. - if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && - isa(EltNo)) { - MVT VT = InVec.getValueType(); - MVT EVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - SmallVector Ops(NElts, DAG.getUNDEF(EVT)); - unsigned Elt = cast(EltNo)->getZExtValue(); - if (Elt < Ops.size()) - Ops[Elt] = InVal; - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - InVec.getValueType(), &Ops[0], Ops.size()); - } return SDValue(); } @@ -5174,8 +5160,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // to examine the mask. if (BCNumEltsChanged) return SDValue(); - int Idx = cast(InVec)->getMask()[Elt]; - int NumElems = InVec.getValueType().getVectorNumElements(); + unsigned Idx = cast(InVec.getOperand(2). + getOperand(Elt))->getZExtValue(); + unsigned NumElems = InVec.getOperand(2).getNumOperands(); InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); if (InVec.getOpcode() == ISD::BIT_CONVERT) InVec = InVec.getOperand(0); @@ -5222,6 +5209,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); MVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); MVT EltType = VT.getVectorElementType(); // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT @@ -5264,36 +5252,56 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } // If everything is good, we can make a shuffle operation. + MVT IndexVT = MVT::i32; if (VecIn1.getNode()) { - SmallVector Mask; + SmallVector BuildVecIndices; for (unsigned i = 0; i != NumInScalars; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) { - Mask.push_back(-1); + BuildVecIndices.push_back(DAG.getUNDEF(IndexVT)); continue; } - // If extracting from the first vector, just use the index directly. SDValue Extract = N->getOperand(i); + + // If extracting from the first vector, just use the index directly. SDValue ExtVal = Extract.getOperand(1); if (Extract.getOperand(0) == VecIn1) { - Mask.push_back(cast(ExtVal)->getZExtValue()); + if (ExtVal.getValueType() == IndexVT) + BuildVecIndices.push_back(ExtVal); + else { + unsigned Idx = cast(ExtVal)->getZExtValue(); + BuildVecIndices.push_back(DAG.getConstant(Idx, IndexVT)); + } continue; } // Otherwise, use InIdx + VecSize unsigned Idx = cast(ExtVal)->getZExtValue(); - Mask.push_back(Idx+NumInScalars); + BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars, IndexVT)); } // Add count and size info. - if (!TLI.isTypeLegal(VT) && LegalTypes) + MVT BuildVecVT = MVT::getVectorVT(IndexVT, NumElts); + if (!TLI.isTypeLegal(BuildVecVT) && LegalTypes) return SDValue(); // Return the new VECTOR_SHUFFLE node. - SDValue Ops[2]; + SDValue Ops[5]; Ops[0] = VecIn1; - Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + if (VecIn2.getNode()) { + Ops[1] = VecIn2; + } else { + // Use an undef build_vector as input for the second operand. + std::vector UnOps(NumInScalars, + DAG.getUNDEF(EltType)); + Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &UnOps[0], UnOps.size()); + AddToWorkList(Ops[1].getNode()); + } + + Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), BuildVecVT, + &BuildVecIndices[0], BuildVecIndices.size()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), VT, Ops, 3); } return SDValue(); @@ -5313,10 +5321,8 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { - return SDValue(); - - MVT VT = N->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); + SDValue ShufMask = N->getOperand(2); + unsigned NumElts = ShufMask.getNumOperands(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5324,13 +5330,60 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { assert(N0.getValueType().getVectorNumElements() == NumElts && "Vector shuffle must be normalized in DAG"); - // FIXME: implement canonicalizations from DAG.getVectorShuffle() + // If the shuffle mask is an identity operation on the LHS, return the LHS. + bool isIdentity = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF && + cast(ShufMask.getOperand(i))->getZExtValue() != i) { + isIdentity = false; + break; + } + } + if (isIdentity) return N->getOperand(0); + + // If the shuffle mask is an identity operation on the RHS, return the RHS. + isIdentity = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF && + cast(ShufMask.getOperand(i))->getZExtValue() != + i+NumElts) { + isIdentity = false; + break; + } + } + if (isIdentity) return N->getOperand(1); + + // Check if the shuffle is a unary shuffle, i.e. one of the vectors is not + // needed at all. + bool isUnary = true; + bool isSplat = true; + int VecNum = -1; + unsigned BaseIdx = 0; + for (unsigned i = 0; i != NumElts; ++i) + if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) { + unsigned Idx=cast(ShufMask.getOperand(i))->getZExtValue(); + int V = (Idx < NumElts) ? 0 : 1; + if (VecNum == -1) { + VecNum = V; + BaseIdx = Idx; + } else { + if (BaseIdx != Idx) + isSplat = false; + if (VecNum != V) { + isUnary = false; + break; + } + } + } + + // Normalize unary shuffle so the RHS is undef. + if (isUnary && VecNum == 1) + std::swap(N0, N1); // If it is a splat, check if the argument vector is a build_vector with // all scalar elements the same. - if (cast(N)->isSplat()) { + if (isSplat) { SDNode *V = N0.getNode(); - // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to @@ -5344,7 +5397,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (V->getOpcode() == ISD::BUILD_VECTOR) { unsigned NumElems = V->getNumOperands(); - unsigned BaseIdx = cast(N)->getSplatIndex(); if (NumElems > BaseIdx) { SDValue Base; bool AllSame = true; @@ -5369,6 +5421,38 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } } + + // If it is a unary or the LHS and the RHS are the same node, turn the RHS + // into an undef. + if (isUnary || N0 == N1) { + // Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the + // first operand. + SmallVector MappedOps; + + for (unsigned i = 0; i != NumElts; ++i) { + if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF || + cast(ShufMask.getOperand(i))->getZExtValue() < + NumElts) { + MappedOps.push_back(ShufMask.getOperand(i)); + } else { + unsigned NewIdx = + cast(ShufMask.getOperand(i))->getZExtValue() - + NumElts; + MappedOps.push_back(DAG.getConstant(NewIdx, + ShufMask.getOperand(i).getValueType())); + } + } + + ShufMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + ShufMask.getValueType(), + &MappedOps[0], MappedOps.size()); + AddToWorkList(ShufMask.getNode()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), + N->getValueType(0), N0, + DAG.getUNDEF(N->getValueType(0)), + ShufMask); + } + return SDValue(); } @@ -5377,42 +5461,52 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { - MVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { if (RHS.getOpcode() == ISD::BIT_CONVERT) RHS = RHS.getOperand(0); if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector Indices; - unsigned NumElts = RHS.getNumOperands(); + std::vector IdxOps; + unsigned NumOps = RHS.getNumOperands(); + unsigned NumElts = NumOps; for (unsigned i = 0; i != NumElts; ++i) { SDValue Elt = RHS.getOperand(i); if (!isa(Elt)) return SDValue(); else if (cast(Elt)->isAllOnesValue()) - Indices.push_back(i); + IdxOps.push_back(DAG.getIntPtrConstant(i)); else if (cast(Elt)->isNullValue()) - Indices.push_back(NumElts); + IdxOps.push_back(DAG.getIntPtrConstant(NumElts)); else return SDValue(); } // Let's see if the target supports this vector_shuffle. - MVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(&Indices[0], RVT)) + if (!TLI.isVectorClearMaskLegal(IdxOps, TLI.getPointerTy(), DAG)) return SDValue(); // Return the new VECTOR_SHUFFLE node. - MVT EVT = RVT.getVectorElementType(); - SmallVector ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - RVT, &ZeroOps[0], ZeroOps.size()); - LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); + MVT EVT = RHS.getValueType().getVectorElementType(); + MVT VT = MVT::getVectorVT(EVT, NumElts); + MVT MaskVT = MVT::getVectorVT(TLI.getPointerTy(), NumElts); + std::vector Ops; + LHS = DAG.getNode(ISD::BIT_CONVERT, LHS.getDebugLoc(), VT, LHS); + Ops.push_back(LHS); + AddToWorkList(LHS.getNode()); + std::vector ZeroOps(NumElts, DAG.getConstant(0, EVT)); + Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VT, &ZeroOps[0], ZeroOps.size())); + Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + MaskVT, &IdxOps[0], IdxOps.size())); + SDValue Result = DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), + VT, &Ops[0], Ops.size()); + + if (VT != N->getValueType(0)) + Result = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Result); + + return Result; } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fed2b02ea57..5ea1ce34307 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -267,10 +267,16 @@ private: bool isVolatile, SDValue ValOp, unsigned StWidth, DebugLoc dl); - /// promoteShuffle - Promote a shuffle mask of a vector VT to perform the - /// same shuffle on a vector of NVT. Must not create an illegal shuffle mask. - SDValue promoteShuffle(MVT NVT, MVT VT, DebugLoc dl, SDValue N1, SDValue N2, - const int *Mask) const; + /// isShuffleLegal - Return non-null if a vector shuffle is legal with the + /// specified mask and type. Targets can specify exactly which masks they + /// support and the code generator is tasked with not creating illegal masks. + /// + /// Note that this will also return true for shuffles that are promoted to a + /// different type. + /// + /// If this is a legal shuffle, this method returns the (possibly promoted) + /// build_vector Mask. If it's not a legal shuffle, it returns null. + SDNode *isShuffleLegal(MVT VT, SDValue Mask) const; bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, SmallPtrSet &NodesLeadingTo); @@ -313,35 +319,50 @@ private: }; } -/// promoteShuffle - Promote a shuffle mask of a vector VT to perform the -/// same shuffle on a vector of NVT. Must not create an illegal shuffle mask. -/// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> -SDValue SelectionDAGLegalize::promoteShuffle(MVT NVT, MVT VT, DebugLoc dl, - SDValue N1, SDValue N2, - const int *Mask) const { - MVT EltVT = NVT.getVectorElementType(); - int NumMaskElts = VT.getVectorNumElements(); - int NumDestElts = NVT.getVectorNumElements(); - unsigned NumEltsGrowth = NumDestElts / NumMaskElts; +/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the +/// specified mask and type. Targets can specify exactly which masks they +/// support and the code generator is tasked with not creating illegal masks. +/// +/// Note that this will also return true for shuffles that are promoted to a +/// different type. +SDNode *SelectionDAGLegalize::isShuffleLegal(MVT VT, SDValue Mask) const { + switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) { + default: return 0; + case TargetLowering::Legal: + case TargetLowering::Custom: + break; + case TargetLowering::Promote: { + // If this is promoted to a different type, convert the shuffle mask and + // ask if it is legal in the promoted type! + MVT NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT); + MVT EltVT = NVT.getVectorElementType(); - assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); - - if (NumEltsGrowth == 1) - return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask); - - SmallVector NewMask; - for (int i = 0; i != NumMaskElts; ++i) { - int Idx = Mask[i]; - for (unsigned j = 0; j != NumEltsGrowth; ++j) { - if (Idx < 0) - NewMask.push_back(-1); - else - NewMask.push_back(Idx * NumEltsGrowth + j); + // If we changed # elements, change the shuffle mask. + unsigned NumEltsGrowth = + NVT.getVectorNumElements() / VT.getVectorNumElements(); + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + if (NumEltsGrowth > 1) { + // Renumber the elements. + SmallVector Ops; + for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) { + SDValue InOp = Mask.getOperand(i); + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (InOp.getOpcode() == ISD::UNDEF) + Ops.push_back(DAG.getUNDEF(EltVT)); + else { + unsigned InEltNo = cast(InOp)->getZExtValue(); + Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, EltVT)); + } + } + } + Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(), + NVT, &Ops[0], Ops.size()); } + VT = NVT; + break; } - assert((int)NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); - assert(TLI.isShuffleMaskLegal(&Mask[0], NVT) && "Shuffle not legal?"); - return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); + } + return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.getNode() : 0; } SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, @@ -1631,15 +1652,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1.getValueType(), Tmp2); unsigned NumElts = Tmp1.getValueType().getVectorNumElements(); + MVT ShufMaskVT = + MVT::getIntVectorWithNumElements(NumElts); + MVT ShufMaskEltVT = ShufMaskVT.getVectorElementType(); + // We generate a shuffle of InVec and ScVec, so the shuffle mask // should be 0,1,2,3,4,5... with the appropriate element replaced with // elt 0 of the RHS. - SmallVector ShufOps; - for (unsigned i = 0; i != NumElts; ++i) - ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); - - Result = DAG.getVectorShuffle(Tmp1.getValueType(), dl, Tmp1, ScVec, - &ShufOps[0]); + SmallVector ShufOps; + for (unsigned i = 0; i != NumElts; ++i) { + if (i != InsertPos->getZExtValue()) + ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT)); + else + ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT)); + } + SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, ShufMaskVT, + &ShufOps[0], ShufOps.size()); + + Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Tmp1.getValueType(), + Tmp1, ScVec, ShufMask); Result = LegalizeOp(Result); break; } @@ -1677,14 +1708,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::VECTOR_SHUFFLE: Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input vectors, Tmp2 = LegalizeOp(Node->getOperand(1)); // but not the shuffle mask. - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); // Allow targets to custom lower the SHUFFLEs they support. switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, Result.getValueType())){ default: assert(0 && "Unknown operation action!"); case TargetLowering::Legal: - assert(TLI.isShuffleMaskLegal(cast(Node)->getMask(), - Result.getValueType()) && + assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) && "vector shuffle should not be created if not legal!"); break; case TargetLowering::Custom: @@ -1698,21 +1728,23 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { MVT VT = Node->getValueType(0); MVT EltVT = VT.getVectorElementType(); MVT PtrVT = TLI.getPointerTy(); - const int *Mask = cast(Node)->getMask(); - int NumElems = VT.getVectorNumElements(); + SDValue Mask = Node->getOperand(2); + unsigned NumElems = Mask.getNumOperands(); SmallVector Ops; - for (int i = 0; i != NumElems; ++i) { - if (Mask[i] < 0) { + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { Ops.push_back(DAG.getUNDEF(EltVT)); - continue; + } else { + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Idx = cast(Arg)->getZExtValue(); + if (Idx < NumElems) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1, + DAG.getConstant(Idx, PtrVT))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2, + DAG.getConstant(Idx - NumElems, PtrVT))); } - int Idx = Mask[i]; - if (Idx < NumElems) - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1, - DAG.getConstant(Idx, PtrVT))); - else - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2, - DAG.getConstant(Idx - NumElems, PtrVT))); } Result = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); break; @@ -1727,8 +1759,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Tmp2); // Convert the shuffle mask to the right # elements. - Result = promoteShuffle(NVT, OVT, dl, Tmp1, Tmp2, - cast(Node)->getMask()); + Tmp3 = SDValue(isShuffleLegal(OVT, Node->getOperand(2)), 0); + assert(Tmp3.getNode() && "Shuffle not legal?"); + Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NVT, Tmp1, Tmp2, Tmp3); Result = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Result); break; } @@ -5457,7 +5490,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // FIXME: it would be far nicer to change this into map // and use a bitmask instead of a list of elements. - // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat. std::map > Values; Values[SplatValue].push_back(0); bool isConstant = true; @@ -5514,17 +5546,21 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { if (SplatValue.getNode()) { // Splat of one value? // Build the shuffle constant vector: <0, 0, 0, 0> - SmallVector ZeroVec(NumElems, 0); + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + SDValue Zero = DAG.getConstant(0, MaskVT.getVectorElementType()); + std::vector ZeroVec(NumElems, Zero); + SDValue SplatMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &ZeroVec[0], ZeroVec.size()); // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it. - if (TLI.isShuffleMaskLegal(&ZeroVec[0], Node->getValueType(0))) { + if (isShuffleLegal(VT, SplatMask)) { // Get the splatted value into the low element of a vector register. SDValue LowValVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue); // Return shuffle(LowValVec, undef, <0,0,0,0>) - return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT), - &ZeroVec[0]); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LowValVec, + DAG.getUNDEF(VT), SplatMask); } } @@ -5546,25 +5582,35 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { std::swap(Val1, Val2); // Build the shuffle constant vector: e.g. <0, 4, 0, 4> - SmallVector ShuffleMask(NumElems, -1); + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT MaskEltVT = MaskVT.getVectorElementType(); + std::vector MaskVec(NumElems); // Set elements of the shuffle mask for Val1. std::vector &Val1Elts = Values[Val1]; for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i) - ShuffleMask[Val1Elts[i]] = 0; + MaskVec[Val1Elts[i]] = DAG.getConstant(0, MaskEltVT); // Set elements of the shuffle mask for Val2. std::vector &Val2Elts = Values[Val2]; for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i) if (Val2.getOpcode() != ISD::UNDEF) - ShuffleMask[Val2Elts[i]] = NumElems; + MaskVec[Val2Elts[i]] = DAG.getConstant(NumElems, MaskEltVT); + else + MaskVec[Val2Elts[i]] = DAG.getUNDEF(MaskEltVT); + + SDValue ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it. if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) && - TLI.isShuffleMaskLegal(&ShuffleMask[0], VT)) { + isShuffleLegal(VT, ShuffleMask)) { Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1); Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2); - return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]); + SDValue Ops[] = { Val1, Val2, ShuffleMask }; + + // Return shuffle(LoValVec, HiValVec, <0,1,0,1>) + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Ops, 3); } } @@ -8020,19 +8066,36 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) { case ISD::VECTOR_SHUFFLE: { SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT); SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), WidenVT); - const int *Mask = cast(Node)->getMask(); - SmallVector NewMask; + // VECTOR_SHUFFLE 3rd operand must be a constant build vector that is + // used as permutation array. We build the vector here instead of widening + // because we don't want to legalize and have it turned to something else. + SDValue PermOp = Node->getOperand(2); + SDValueVector NewOps; + MVT PVT = PermOp.getValueType().getVectorElementType(); for (unsigned i = 0; i < NumElts; ++i) { - int Idx = Mask[i]; - if (Idx < (int)NumElts) - NewMask.push_back(Idx); - else - NewMask.push_back(Idx + NewNumElts - NumElts); + if (PermOp.getOperand(i).getOpcode() == ISD::UNDEF) { + NewOps.push_back(PermOp.getOperand(i)); + } else { + unsigned Idx = + cast(PermOp.getOperand(i))->getZExtValue(); + if (Idx < NumElts) { + NewOps.push_back(PermOp.getOperand(i)); + } + else { + NewOps.push_back(DAG.getConstant(Idx + NewNumElts - NumElts, + PermOp.getOperand(i).getValueType())); + } + } } - for (unsigned i = NumElts; i < NewNumElts; ++i) - NewMask.push_back(-1); - - Result = DAG.getVectorShuffle(WidenVT, dl, Tmp1, Tmp2, &NewMask[0]); + for (unsigned i = NumElts; i < NewNumElts; ++i) { + NewOps.push_back(DAG.getUNDEF(PVT)); + } + + SDValue Tmp3 = DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::getVectorVT(PVT, NewOps.size()), + &NewOps[0], NewOps.size()); + + Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, Tmp1, Tmp2, Tmp3); break; } case ISD::LOAD: { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 116f3004203..eec328f52ff 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -772,8 +772,10 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo, // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. - const int *Mask = cast(N)->getMask(); - SmallVector Ops; + SDValue Mask = N->getOperand(2); + MVT IdxVT = Mask.getValueType().getVectorElementType(); + SmallVector Ops; + Ops.reserve(NewElts); for (unsigned High = 0; High < 2; ++High) { SDValue &Output = High ? Hi : Lo; @@ -785,15 +787,18 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo, unsigned FirstMaskIdx = High * NewElts; bool useBuildVector = false; for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset); + // The mask element. This indexes into the input. - int Idx = Mask[FirstMaskIdx + MaskOffset]; + unsigned Idx = Arg.getOpcode() == ISD::UNDEF ? + -1U : cast(Arg)->getZExtValue(); // The input vector this mask element indexes into. - unsigned Input = (unsigned)Idx / NewElts; + unsigned Input = Idx / NewElts; if (Input >= array_lengthof(Inputs)) { // The mask element does not index into any input vector. - Ops.push_back(-1); + Ops.push_back(DAG.getUNDEF(IdxVT)); continue; } @@ -821,24 +826,27 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo, } // Add the mask index for the new shuffle vector. - Ops.push_back(Idx + OpNo * NewElts); + Ops.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT)); } if (useBuildVector) { MVT EltVT = NewVT.getVectorElementType(); - SmallVector SVOps; + Ops.clear(); // Extract the input elements by hand. for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset); + // The mask element. This indexes into the input. - int Idx = Mask[FirstMaskIdx + MaskOffset]; + unsigned Idx = Arg.getOpcode() == ISD::UNDEF ? + -1U : cast(Arg)->getZExtValue(); // The input vector this mask element indexes into. - unsigned Input = (unsigned)Idx / NewElts; + unsigned Input = Idx / NewElts; if (Input >= array_lengthof(Inputs)) { // The mask element is "undef" or indexes off the end of the input. - SVOps.push_back(DAG.getUNDEF(EltVT)); + Ops.push_back(DAG.getUNDEF(EltVT)); continue; } @@ -846,22 +854,25 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo, Idx -= Input * NewElts; // Extract the vector element by hand. - SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getIntPtrConstant(Idx))); + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], DAG.getIntPtrConstant(Idx))); } // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, &Ops[0], Ops.size()); } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = DAG.getUNDEF(NewVT); } else { + // At least one input vector was used. Create a new shuffle vector. + SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::getVectorVT(IdxVT, Ops.size()), + &Ops[0], Ops.size()); SDValue Op0 = Inputs[InputUsed[0]]; // If only one input was used, use an undefined vector for the other. SDValue Op1 = InputUsed[1] == -1U ? DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; - // At least one input vector was used. Create a new shuffle vector. - Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + Output = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, Op0, Op1, NewMask); } Ops.clear(); @@ -1462,15 +1473,18 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { if (NumOperands == 2) { // Replace concat of two operands with a shuffle. - SmallVector MaskOps(WidenNumElts); + MVT PtrVT = TLI.getPointerTy(); + SmallVector MaskOps(WidenNumElts); for (unsigned i=0; i < WidenNumElts/2; ++i) { - MaskOps[i] = i; - MaskOps[i+WidenNumElts/2] = i+WidenNumElts; + MaskOps[i] = DAG.getConstant(i, PtrVT); + MaskOps[i+WidenNumElts/2] = DAG.getConstant(i+WidenNumElts, PtrVT); } - return DAG.getVectorShuffle(WidenVT, dl, - GetWidenedVector(N->getOperand(0)), - GetWidenedVector(N->getOperand(1)), - &MaskOps[0]); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::getVectorVT(PtrVT, WidenNumElts), + &MaskOps[0], WidenNumElts); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), Mask); } } } @@ -1748,7 +1762,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) { MVT VT = N->getValueType(0); - int NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorNumElements(); DebugLoc dl = N->getDebugLoc(); MVT WidenVT = TLI.getTypeToTransformTo(VT); @@ -1758,17 +1772,28 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) { SDValue InOp2 = GetWidenedVector(N->getOperand(1)); // Adjust mask based on new input vector length. - const int *Mask = cast(N)->getMask(); - SmallVector NewMask; - for (int i = 0; i < NumElts; ++i) { - if (Mask[i] < NumElts) - NewMask.push_back(Mask[i]); - else - NewMask.push_back(Mask[i] - NumElts + WidenNumElts); + SDValue Mask = N->getOperand(2); + SmallVector MaskOps(WidenNumElts); + MVT IdxVT = Mask.getValueType().getVectorElementType(); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) + MaskOps[i] = Arg; + else { + unsigned Idx = cast(Arg)->getZExtValue(); + if (Idx < NumElts) + MaskOps[i] = Arg; + else + MaskOps[i] = DAG.getConstant(Idx - NumElts + WidenNumElts, IdxVT); + } } for (unsigned i = NumElts; i < WidenNumElts; ++i) - NewMask.push_back(-1); - return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); + MaskOps[i] = DAG.getUNDEF(IdxVT); + SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::getVectorVT(IdxVT, WidenNumElts), + &MaskOps[0], WidenNumElts); + + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, InOp1, InOp2, NewMask); } SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 11d12eb0263..edd985ed1ad 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -456,13 +456,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(AT->getRawSubclassData()); break; } - case ISD::VECTOR_SHUFFLE: { - const int *Mask = cast(N)->getMask(); - for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); - i != e; ++i) - ID.AddInteger(Mask[i]); - break; - } } // end switch (N->getOpcode()) } @@ -769,6 +762,12 @@ void SelectionDAG::VerifyNode(SDNode *N) { assert(N->getValueType(0).isVector() && "Wrong return type!"); assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && "Wrong number of operands!"); + MVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); break; } } @@ -1127,110 +1126,6 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { return SDValue(CondCodeNodes[Cond], 0); } -static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl &M) { - std::swap(N1, N2); - int NElts = M.size(); - for (int i = 0; i != NElts; ++i) { - if (M[i] >= NElts) - M[i] -= NElts; - else if (M[i] >= 0) - M[i] += NElts; - } -} - -SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, - SDValue N2, const int *Mask) { - assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); - assert(VT.isVector() && N1.getValueType().isVector() && - "Vector Shuffle VTs must be a vectors"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() - && "Vector Shuffle VTs must have same element type"); - - // Canonicalize shuffle undef, undef -> undef - if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) - return N1; - - // Validate that all the indices past in in Mask are within the range of - // elements input to the shuffle. - int NElts = VT.getVectorNumElements(); - SmallVector MaskVec; - for (int i = 0; i != NElts; ++i) { - if (Mask[i] >= (NElts * 2)) { - assert(0 && "Index out of range"); - return SDValue(); - } - MaskVec.push_back(Mask[i]); - } - - // Canonicalize shuffle v, v -> v, undef - if (N1 == N2) { - N2 = getUNDEF(VT); - for (int i = 0; i != NElts; ++i) - if (MaskVec[i] >= NElts) MaskVec[i] -= NElts; - } - - // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. - if (N1.getOpcode() == ISD::UNDEF) - commuteShuffle(N1, N2, MaskVec); - - // Canonicalize all index into lhs, -> shuffle lhs, undef - // Canonicalize all index into rhs, -> shuffle rhs, undef - bool AllLHS = true, AllRHS = true; - bool N2Undef = N2.getOpcode() == ISD::UNDEF; - for (int i = 0; i != NElts; ++i) { - if (MaskVec[i] >= NElts) { - if (N2Undef) - MaskVec[i] = -1; - else - AllLHS = false; - } else if (MaskVec[i] >= 0) { - AllRHS = false; - } - } - if (AllLHS && AllRHS) - return getUNDEF(VT); - if (AllLHS) - N2 = getUNDEF(VT); - if (AllRHS) { - N1 = getUNDEF(VT); - commuteShuffle(N1, N2, MaskVec); - } - - // If Identity shuffle, or all shuffle in to undef, return that node. - bool AllUndef = true; - bool Identity = true; - for (int i = 0; i < NElts; ++i) { - if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false; - if (MaskVec[i] >= 0) AllUndef = false; - } - if (Identity) - return N1; - if (AllUndef) - return getUNDEF(VT); - - FoldingSetNodeID ID; - SDValue Ops[2] = { N1, N2 }; - AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); - for (int i = 0; i != NElts; ++i) - ID.AddInteger(MaskVec[i]); - - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return SDValue(E, 0); - - // Allocate the mask array for the node out of the BumpPtrAllocator, since - // SDNode doesn't have access to it. This memory will be "leaked" when - // the node is deallocated, but recovered when the NodeAllocator is released. - int *MaskAlloc = OperandAllocator.Allocate(NElts); - memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); - - ShuffleVectorSDNode *N = NodeAllocator.Allocate(); - new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); -} - SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, @@ -2192,18 +2087,19 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { SDValue SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned i) { MVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - const int *PermMask = cast(N)->getMask(); - if (PermMask[i] < 0) + SDValue PermMask = N->getOperand(2); + SDValue Idx = PermMask.getOperand(i); + if (Idx.getOpcode() == ISD::UNDEF) return getUNDEF(VT.getVectorElementType()); - int Index = PermMask[i]; - int NumElems = VT.getVectorNumElements(); + unsigned Index = cast(Idx)->getZExtValue(); + unsigned NumElems = PermMask.getNumOperands(); SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); Index %= NumElems; if (V.getOpcode() == ISD::BIT_CONVERT) { V = V.getOperand(0); MVT VVT = V.getValueType(); - if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) + if (!VVT.isVector() || VVT.getVectorNumElements() != NumElems) return SDValue(); } if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) @@ -2897,7 +2793,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, } break; case ISD::VECTOR_SHUFFLE: - assert(0 && "should use getVectorShuffle constructor!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType().isVector() && + VT.isVector() && N3.getValueType().isVector() && + N3.getOpcode() == ISD::BUILD_VECTOR && + VT.getVectorNumElements() == N3.getNumOperands() && + "Illegal VECTOR_SHUFFLE node!"); break; case ISD::BIT_CONVERT: // Fold bit_convert nodes from a type to themselves. @@ -5421,14 +5322,14 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) { - const int *Mask = cast(this)->getMask(); + SDNode *Mask = getOperand(2).getNode(); OS << "<"; - for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + for (unsigned i = 0, e = Mask->getNumOperands(); i != e; ++i) { if (i) OS << ","; - if (Mask[i] < 0) + if (Mask->getOperand(i).getOpcode() == ISD::UNDEF) OS << "u"; else - OS << Mask[i]; + OS << cast(Mask->getOperand(i))->getZExtValue(); } OS << ">"; } @@ -5709,13 +5610,3 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, SplatBitSize = sz; return true; } - -bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) { - int Idx = -1; - for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { - if (Idx < 0) Idx = Mask[i]; - if (Mask[i] >= 0 && Mask[i] != Idx) - return false; - } - return true; -} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index aac4b655db5..3e6da24a4a1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -870,7 +870,8 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { if (ConstantFP *CFP = dyn_cast(C)) return N = DAG.getConstantFP(*CFP, VT); - if (isa(C) && !V->getType()->isAggregateType()) + if (isa(C) && !isa(V->getType()) && + !V->getType()->isAggregateType()) return N = DAG.getUNDEF(VT); if (ConstantExpr *CE = dyn_cast(C)) { @@ -924,11 +925,14 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CP->getOperand(i))); } else { - assert(isa(C) && "Unknown vector constant!"); + assert((isa(C) || isa(C)) && + "Unknown vector constant!"); MVT EltVT = TLI.getValueType(VecTy->getElementType()); SDValue Op; - if (EltVT.isFloatingPoint()) + if (isa(C)) + Op = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) Op = DAG.getConstantFP(0, EltVT); else Op = DAG.getConstant(0, EltVT); @@ -2431,42 +2435,37 @@ void SelectionDAGLowering::visitExtractElement(User &I) { // Utility for visitShuffleVector - Returns true if the mask is mask starting // from SIndx and increasing to the element length (undefs are allowed). -static bool SequentialMask(SmallVectorImpl &Mask, int SIndx) { - int MaskNumElts = Mask.size(); - for (int i = 0; i != MaskNumElts; ++i) - if ((Mask[i] >= 0) && (Mask[i] != i + SIndx)) - return false; +static bool SequentialMask(SDValue Mask, unsigned SIndx) { + unsigned MaskNumElts = Mask.getNumOperands(); + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) { + unsigned Idx = cast(Mask.getOperand(i))->getZExtValue(); + if (Idx != i + SIndx) + return false; + } + } return true; } void SelectionDAGLowering::visitShuffleVector(User &I) { - SmallVector Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); + SDValue Mask = getValue(I.getOperand(2)); - // Convert the ConstantVector mask operand into an array of ints, with -1 - // representing undef values. - SmallVector MaskElts; - cast(I.getOperand(2))->getVectorElements(MaskElts); - int MaskNumElts = MaskElts.size(); - for (int i = 0; i != MaskNumElts; ++i) { - if (isa(MaskElts[i])) - Mask.push_back(-1); - else - Mask.push_back(cast(MaskElts[i])->getSExtValue()); - } - MVT VT = TLI.getValueType(I.getType()); MVT SrcVT = Src1.getValueType(); + int MaskNumElts = Mask.getNumOperands(); int SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &Mask[0])); + setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(), + VT, Src1, Src2, Mask)); return; } // Normalize the shuffle vector since mask and vector length don't match. + MVT MaskEltVT = Mask.getValueType().getVectorElementType(); + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors @@ -2480,33 +2479,44 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { // Pad both vectors with undefs to make them the same length as the mask. unsigned NumConcat = MaskNumElts / SrcNumElts; - bool Src1U = Src1.getOpcode() == ISD::UNDEF; - bool Src2U = Src2.getOpcode() == ISD::UNDEF; SDValue UndefVal = DAG.getUNDEF(SrcVT); - SmallVector MOps1(NumConcat, UndefVal); - SmallVector MOps2(NumConcat, UndefVal); + SDValue* MOps1 = new SDValue[NumConcat]; + SDValue* MOps2 = new SDValue[NumConcat]; MOps1[0] = Src1; MOps2[0] = Src2; - - Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, - &MOps1[0], NumConcat); - Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, - &MOps2[0], NumConcat); + for (unsigned i = 1; i != NumConcat; ++i) { + MOps1[i] = UndefVal; + MOps2[i] = UndefVal; + } + Src1 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, MOps1, NumConcat); + Src2 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, MOps2, NumConcat); + + delete [] MOps1; + delete [] MOps2; // Readjust mask for new input vector length. - SmallVector MappedOps; + SmallVector MappedOps; for (int i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx < SrcNumElts) - MappedOps.push_back(Idx); - else - MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) { + MappedOps.push_back(Mask.getOperand(i)); + } else { + int Idx = cast(Mask.getOperand(i))->getZExtValue(); + if (Idx < SrcNumElts) + MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT)); + else + MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - SrcNumElts, + MaskEltVT)); + } } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &MappedOps[0])); + Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + Mask.getValueType(), + &MappedOps[0], MappedOps.size()); + + setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(), + VT, Src1, Src2, Mask)); return; } @@ -2531,19 +2541,20 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { int MaxRange[2] = {-1, -1}; for (int i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - int Input = 0; - if (Idx < 0) - continue; - - if (Idx >= SrcNumElts) { - Input = 1; - Idx -= SrcNumElts; + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + int Idx = cast(Arg)->getZExtValue(); + int Input = 0; + if (Idx >= SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; } - if (Idx > MaxRange[Input]) - MaxRange[Input] = Idx; - if (Idx < MinRange[Input]) - MinRange[Input] = Idx; } // Check if the access is smaller than the vector size and can we find @@ -2585,18 +2596,26 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { } } // Calculate new mask. - SmallVector MappedOps; + SmallVector MappedOps; for (int i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx < 0) - MappedOps.push_back(Idx); - else if (Idx < SrcNumElts) - MappedOps.push_back(Idx - StartIdx[0]); - else - MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + MappedOps.push_back(Arg); + } else { + int Idx = cast(Arg)->getZExtValue(); + if (Idx < SrcNumElts) + MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0], MaskEltVT)); + else { + Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts; + MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT)); + } + } } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &MappedOps[0])); + Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + Mask.getValueType(), + &MappedOps[0], MappedOps.size()); + setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(), + VT, Src1, Src2, Mask)); return; } } @@ -2608,10 +2627,12 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { MVT PtrVT = TLI.getPointerTy(); SmallVector Ops; for (int i = 0; i != MaskNumElts; ++i) { - if (Mask[i] < 0) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { Ops.push_back(DAG.getUNDEF(EltVT)); } else { - int Idx = Mask[i]; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + int Idx = cast(Arg)->getZExtValue(); if (Idx < SrcNumElts) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), EltVT, Src1, DAG.getConstant(Idx, PtrVT))); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 87de2c7cf47..c07e6d5645c 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1672,7 +1672,7 @@ SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - const int *PermMask = cast(Op)->getMask(); + SDValue PermMask = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); if (V2.getOpcode() == ISD::UNDEF) V2 = V1; @@ -1703,40 +1703,39 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } else assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); - for (unsigned i = 0; i != MaxElts; ++i) { - if (PermMask[i] < 0) - continue; - - unsigned SrcElt = PermMask[i]; + for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) { + if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) { + unsigned SrcElt = cast (PermMask.getOperand(i))->getZExtValue(); - if (monotonic) { - if (SrcElt >= V2EltIdx0) { - if (1 >= (++EltsFromV2)) { - V2Elt = (V2EltIdx0 - SrcElt) << 2; + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + if (1 >= (++EltsFromV2)) { + V2Elt = (V2EltIdx0 - SrcElt) << 2; + } + } else if (CurrElt != SrcElt) { + monotonic = false; } - } else if (CurrElt != SrcElt) { - monotonic = false; + + ++CurrElt; } - ++CurrElt; - } - - if (rotate) { - if (PrevElt > 0 && SrcElt < MaxElts) { - if ((PrevElt == SrcElt - 1) - || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + PrevElt = SrcElt; + if (SrcElt == 0) + V0Elt = i; + } else { + rotate = false; + } + } else if (PrevElt == 0) { + // First time through, need to keep track of previous element PrevElt = SrcElt; - if (SrcElt == 0) - V0Elt = i; } else { + // This isn't a rotation, takes elements from vector 2 rotate = false; } - } else if (PrevElt == 0) { - // First time through, need to keep track of previous element - PrevElt = SrcElt; - } else { - // This isn't a rotation, takes elements from vector 2 - rotate = false; } } } @@ -1769,8 +1768,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; - for (unsigned i = 0, e = MaxElts; i != e; ++i) { - unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; + for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { + unsigned SrcElt; + if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) + SrcElt = 0; + else + SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); for (unsigned j = 0; j < BytesPerElement; ++j) { ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index cb36b05294b..32ff8f46cf4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -456,22 +456,22 @@ static bool isFloatingPointZero(SDValue Op) { /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// true if Op is undef or if it matches the specified value. -static bool isConstantOrUndef(int Op, int Val) { - return Op < 0 || Op == Val; +static bool isConstantOrUndef(SDValue Op, unsigned Val) { + return Op.getOpcode() == ISD::UNDEF || + cast(Op)->getZExtValue() == Val; } /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. -bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { - const int *Mask = N->getMask(); +bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { if (!isUnary) { for (unsigned i = 0; i != 16; ++i) - if (!isConstantOrUndef(Mask[i], i*2+1)) + if (!isConstantOrUndef(N->getOperand(i), i*2+1)) return false; } else { for (unsigned i = 0; i != 8; ++i) - if (!isConstantOrUndef(Mask[i], i*2+1) || - !isConstantOrUndef(Mask[i+8], i*2+1)) + if (!isConstantOrUndef(N->getOperand(i), i*2+1) || + !isConstantOrUndef(N->getOperand(i+8), i*2+1)) return false; } return true; @@ -479,19 +479,18 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. -bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { - const int *Mask = N->getMask(); +bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { if (!isUnary) { for (unsigned i = 0; i != 16; i += 2) - if (!isConstantOrUndef(Mask[i ], i*2+2) || - !isConstantOrUndef(Mask[i+1], i*2+3)) + if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || + !isConstantOrUndef(N->getOperand(i+1), i*2+3)) return false; } else { for (unsigned i = 0; i != 8; i += 2) - if (!isConstantOrUndef(Mask[i ], i*2+2) || - !isConstantOrUndef(Mask[i+1], i*2+3) || - !isConstantOrUndef(Mask[i+8], i*2+2) || - !isConstantOrUndef(Mask[i+9], i*2+3)) + if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || + !isConstantOrUndef(N->getOperand(i+1), i*2+3) || + !isConstantOrUndef(N->getOperand(i+8), i*2+2) || + !isConstantOrUndef(N->getOperand(i+9), i*2+3)) return false; } return true; @@ -499,29 +498,27 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { /// isVMerge - Common function, used to match vmrg* shuffles. /// -static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, +static bool isVMerge(SDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { - assert(N->getValueType(0) == MVT::v16i8 && - "PPC only supports shuffles by bytes!"); + assert(N->getOpcode() == ISD::BUILD_VECTOR && + N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); - const int *Mask = N->getMask(); for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit - if (!isConstantOrUndef(Mask[i*UnitSize*2+j], + if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), LHSStart+j+i*UnitSize) || - !isConstantOrUndef(Mask[i*UnitSize*2+UnitSize+j], + !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), RHSStart+j+i*UnitSize)) return false; } - return true; + return true; } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary) { +bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 8, 24); return isVMerge(N, UnitSize, 8, 8); @@ -529,8 +526,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary) { +bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 0, 16); return isVMerge(N, UnitSize, 0, 0); @@ -540,92 +536,91 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { - assert(N->getValueType(0) == MVT::v16i8 && - "PPC only supports shuffles by bytes!"); - - ShuffleVectorSDNode *SVOp = cast(N); - + assert(N->getOpcode() == ISD::BUILD_VECTOR && + N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); // Find the first non-undef value in the shuffle mask. - const int *Mask = SVOp->getMask(); unsigned i; - for (i = 0; i != 16 && Mask[i] < 0; ++i) + for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) /*search*/; if (i == 16) return -1; // all undef. - // Otherwise, check to see if the rest of the elements are consecutively + // Otherwise, check to see if the rest of the elements are consequtively // numbered from this value. - unsigned ShiftAmt = Mask[i]; + unsigned ShiftAmt = cast(N->getOperand(i))->getZExtValue(); if (ShiftAmt < i) return -1; ShiftAmt -= i; if (!isUnary) { - // Check the rest of the elements to see if they are consecutive. + // Check the rest of the elements to see if they are consequtive. for (++i; i != 16; ++i) - if (!isConstantOrUndef(Mask[i], ShiftAmt+i)) + if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) return -1; } else { - // Check the rest of the elements to see if they are consecutive. + // Check the rest of the elements to see if they are consequtive. for (++i; i != 16; ++i) - if (!isConstantOrUndef(Mask[i], (ShiftAmt+i) & 15)) + if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) return -1; } + return ShiftAmt; } /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. -bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { - assert(N->getValueType(0) == MVT::v16i8 && +bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + N->getNumOperands() == 16 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. - const int *Mask = N->getMask(); - unsigned ElementBase = Mask[0]; - - // FIXME: Handle UNDEF elements too! - if (ElementBase >= 16) + unsigned ElementBase = 0; + SDValue Elt = N->getOperand(0); + if (ConstantSDNode *EltV = dyn_cast(Elt)) + ElementBase = EltV->getZExtValue(); + else + return false; // FIXME: Handle UNDEF elements too! + + if (cast(Elt)->getZExtValue() >= 16) return false; - // Check that the indices are consecutive, in the case of a multi-byte element - // splatted with a v16i8 mask. - for (unsigned i = 1; i != EltSize; ++i) - if (Mask[i] < 0 || Mask[i] != (int)(i+ElementBase)) + // Check that they are consequtive. + for (unsigned i = 1; i != EltSize; ++i) { + if (!isa(N->getOperand(i)) || + cast(N->getOperand(i))->getZExtValue() != i+ElementBase) return false; + } + assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { - if (Mask[i] < 0) continue; + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + assert(isa(N->getOperand(i)) && + "Invalid VECTOR_SHUFFLE mask!"); for (unsigned j = 0; j != EltSize; ++j) - if (Mask[i+j] != Mask[j]) + if (N->getOperand(i+j) != N->getOperand(j)) return false; } + return true; } /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. bool PPC::isAllNegativeZeroVector(SDNode *N) { - BuildVectorSDNode *BV = cast(N); - - APInt APVal, APUndef; - unsigned BitSize; - bool HasAnyUndefs; - - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32)) - if (ConstantFPSDNode *CFP = dyn_cast(N->getOperand(0))) + assert(N->getOpcode() == ISD::BUILD_VECTOR); + if (PPC::isSplatShuffleMask(N, N->getNumOperands())) + if (ConstantFPSDNode *CFP = dyn_cast(N)) return CFP->getValueAPF().isNegZero(); - return false; } /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { - ShuffleVectorSDNode *SVOp = cast(N); - assert(isSplatShuffleMask(SVOp, EltSize)); - return SVOp->getMask()[0] / EltSize; + assert(isSplatShuffleMask(N, EltSize)); + return cast(N->getOperand(0))->getZExtValue() / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed @@ -3154,10 +3149,11 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS); - int Ops[16]; + SDValue Ops[16]; for (unsigned i = 0; i != 16; ++i) - Ops[i] = i + Amt; - SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); + Ops[i] = DAG.getConstant(i+Amt, MVT::i8); + SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, LHS, RHS, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops,16)); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); } @@ -3358,7 +3354,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); - int ShufIdxs[16]; + unsigned ShufIdxs[16]; switch (OpNum) { default: assert(0 && "Unknown i32 permute!"); case OP_VMRGHW: @@ -3396,11 +3392,13 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, case OP_VSLDOI12: return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } - MVT VT = OpLHS.getValueType(); - OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS); - OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS); - SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); + SDValue Ops[16]; + for (unsigned i = 0; i != 16; ++i) + Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8); + + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, OpLHS.getValueType(), + OpLHS, OpRHS, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16)); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this @@ -3408,30 +3406,28 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - ShuffleVectorSDNode *SVOp = cast(Op); - const int *PermMask = SVOp->getMask(); - MVT VT = Op.getValueType(); + SDValue PermMask = Op.getOperand(2); // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. if (V2.getOpcode() == ISD::UNDEF) { - if (PPC::isSplatShuffleMask(SVOp, 1) || - PPC::isSplatShuffleMask(SVOp, 2) || - PPC::isSplatShuffleMask(SVOp, 4) || - PPC::isVPKUWUMShuffleMask(SVOp, true) || - PPC::isVPKUHUMShuffleMask(SVOp, true) || - PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, true) || - PPC::isVMRGLShuffleMask(SVOp, 2, true) || - PPC::isVMRGLShuffleMask(SVOp, 4, true) || - PPC::isVMRGHShuffleMask(SVOp, 1, true) || - PPC::isVMRGHShuffleMask(SVOp, 2, true) || - PPC::isVMRGHShuffleMask(SVOp, 4, true)) { + if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) || + PPC::isSplatShuffleMask(PermMask.getNode(), 2) || + PPC::isSplatShuffleMask(PermMask.getNode(), 4) || + PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) || + PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) || + PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 || + PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) || + PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) || + PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) || + PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) || + PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) || + PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) { return Op; } } @@ -3439,15 +3435,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. - if (PPC::isVPKUWUMShuffleMask(SVOp, false) || - PPC::isVPKUHUMShuffleMask(SVOp, false) || - PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, false) || - PPC::isVMRGLShuffleMask(SVOp, 2, false) || - PPC::isVMRGLShuffleMask(SVOp, 4, false) || - PPC::isVMRGHShuffleMask(SVOp, 1, false) || - PPC::isVMRGHShuffleMask(SVOp, 2, false) || - PPC::isVMRGHShuffleMask(SVOp, 4, false)) + if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) || + PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) || + PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 || + PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) || + PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) || + PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) || + PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) || + PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) || + PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our @@ -3457,10 +3453,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number unsigned EltNo = 8; // Start out undef. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. - if (PermMask[i*4+j] < 0) + if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) continue; // Undef, ignore it. - unsigned ByteSource = PermMask[i*4+j]; + unsigned ByteSource = + cast(PermMask.getOperand(i*4+j))->getZExtValue(); if ((ByteSource & 3) != j) { isFourElementShuffle = false; break; @@ -3512,8 +3509,12 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; - for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { - unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; + for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { + unsigned SrcElt; + if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) + SrcElt = 0; + else + SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); for (unsigned j = 0; j != BytesPerElement; ++j) ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, @@ -3703,12 +3704,13 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts); // Merge the results together. - int Ops[16]; + SDValue Ops[16]; for (unsigned i = 0; i != 8; ++i) { - Ops[i*2 ] = 2*i+1; - Ops[i*2+1] = 2*i+1+16; + Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); + Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); } - return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, EvenParts, OddParts, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16)); } else { assert(0 && "Unknown mul to lower!"); abort(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 79464749724..01111cfb874 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -175,21 +175,19 @@ namespace llvm { namespace PPC { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. - bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + bool isVPKUHUMShuffleMask(SDNode *N, bool isUnary); /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. - bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + bool isVPKUWUMShuffleMask(SDNode *N, bool isUnary); /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). - bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary); + bool isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary); /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). - bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary); + bool isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary); /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. @@ -198,7 +196,7 @@ namespace llvm { /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. - bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); + bool isSplatShuffleMask(SDNode *N, unsigned EltSize); /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 9a5be79e816..c90fbc91015 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -15,118 +15,96 @@ // Altivec transformation functions and pattern fragments. // +/// VPKUHUM_shuffle_mask/VPKUWUM_shuffle_mask - Return true if this is a valid +/// shuffle mask for the VPKUHUM or VPKUWUM instructions. +def VPKUHUM_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVPKUHUMShuffleMask(N, false); +}]>; +def VPKUWUM_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVPKUWUMShuffleMask(N, false); +}]>; -def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast(N), false); +def VPKUHUM_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVPKUHUMShuffleMask(N, true); }]>; -def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast(N), false); -}]>; -def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast(N), true); -}]>; -def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast(N), true); +def VPKUWUM_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVPKUWUMShuffleMask(N, true); }]>; -def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast(N), 1, false); +def VMRGLB_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGLShuffleMask(N, 1, false); }]>; -def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast(N), 2, false); +def VMRGLH_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGLShuffleMask(N, 2, false); }]>; -def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast(N), 4, false); +def VMRGLW_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGLShuffleMask(N, 4, false); }]>; -def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast(N), 1, false); +def VMRGHB_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGHShuffleMask(N, 1, false); }]>; -def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast(N), 2, false); +def VMRGHH_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGHShuffleMask(N, 2, false); }]>; -def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast(N), 4, false); +def VMRGHW_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGHShuffleMask(N, 4, false); }]>; - -def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast(N), 1, true); +def VMRGLB_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGLShuffleMask(N, 1, true); }]>; -def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast(N), 2, true); +def VMRGLH_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGLShuffleMask(N, 2, true); }]>; -def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast(N), 4, true); +def VMRGLW_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGLShuffleMask(N, 4, true); }]>; -def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast(N), 1, true); +def VMRGHB_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGHShuffleMask(N, 1, true); }]>; -def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast(N), 2, true); +def VMRGHH_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGHShuffleMask(N, 2, true); }]>; -def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast(N), 4, true); +def VMRGHW_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isVMRGHShuffleMask(N, 4, true); }]>; - -def VSLDOI_get_imm : SDNodeXForm; -def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ +def VSLDOI_shuffle_mask : PatLeaf<(build_vector), [{ return PPC::isVSLDOIShuffleMask(N, false) != -1; }], VSLDOI_get_imm>; - /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into /// vector_shuffle(X,undef,mask) by the dag combiner. -def VSLDOI_unary_get_imm : SDNodeXForm; -def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ +def VSLDOI_unary_shuffle_mask : PatLeaf<(build_vector), [{ return PPC::isVSLDOIShuffleMask(N, true) != -1; }], VSLDOI_unary_get_imm>; // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. -def VSPLTB_get_imm : SDNodeXForm; -def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isSplatShuffleMask(cast(N), 1); +def VSPLTB_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isSplatShuffleMask(N, 1); }], VSPLTB_get_imm>; -def VSPLTH_get_imm : SDNodeXForm; -def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isSplatShuffleMask(cast(N), 2); +def VSPLTH_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isSplatShuffleMask(N, 2); }], VSPLTH_get_imm>; -def VSPLTW_get_imm : SDNodeXForm; -def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isSplatShuffleMask(cast(N), 4); +def VSPLTW_shuffle_mask : PatLeaf<(build_vector), [{ + return PPC::isSplatShuffleMask(N, 4); }], VSPLTW_get_imm>; @@ -290,7 +268,8 @@ def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>; def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), "vsldoi $vD, $vA, $vB, $SH", VecFP, [(set VRRC:$vD, - (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>; + (vector_shuffle (v16i8 VRRC:$vA), VRRC:$vB, + VSLDOI_shuffle_mask:$SH))]>; // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), @@ -366,22 +345,28 @@ def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>; def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), + VRRC:$vB, VMRGHB_shuffle_mask))]>; def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghh $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), + VRRC:$vB, VMRGHH_shuffle_mask))]>; def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghw $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), + VRRC:$vB, VMRGHW_shuffle_mask))]>; def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), + VRRC:$vB, VMRGLB_shuffle_mask))]>; def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglh $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), + VRRC:$vB, VMRGLH_shuffle_mask))]>; def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglw $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), + VRRC:$vB, VMRGLW_shuffle_mask))]>; def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; @@ -455,16 +440,16 @@ def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>; def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltb $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, - (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), + VSPLTB_shuffle_mask:$UIMM))]>; def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vsplth $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, - (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), + VSPLTH_shuffle_mask:$UIMM))]>; def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltw $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, - (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), + VSPLTW_shuffle_mask:$UIMM))]>; def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; @@ -494,13 +479,13 @@ def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>; def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuhum $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, - (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), + VRRC:$vB, VPKUHUM_shuffle_mask))]>; def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuwum $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, - (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), + VRRC:$vB, VPKUWUM_shuffle_mask))]>; def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; // Vector Unpack. @@ -618,25 +603,25 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; // Shuffles. // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) -def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef), - (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>; -def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef), +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VSLDOI_unary_shuffle_mask:$in), + (VSLDOI VRRC:$vA, VRRC:$vA, VSLDOI_unary_shuffle_mask:$in)>; +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUWUM_unary_shuffle_mask:$in), (VPKUWUM VRRC:$vA, VRRC:$vA)>; -def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef), +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUHUM_unary_shuffle_mask:$in), (VPKUHUM VRRC:$vA, VRRC:$vA)>; // Match vmrg*(x,x) -def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef), +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLB_unary_shuffle_mask:$in), (VMRGLB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef), +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLH_unary_shuffle_mask:$in), (VMRGLH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef), +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLW_unary_shuffle_mask:$in), (VMRGLW VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef), +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHB_unary_shuffle_mask:$in), (VMRGHB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef), +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHH_unary_shuffle_mask:$in), (VMRGHH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), +def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHW_unary_shuffle_mask:$in), (VMRGHW VRRC:$vA, VRRC:$vA)>; // Logical Operations diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 02366020d77..d51435cf873 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,8 +45,7 @@ static cl::opt DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); // Forward declarations. -static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, - SDValue V2); +static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl); X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM) { @@ -1668,7 +1667,9 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); - Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); + Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, + DAG.getUNDEF(MVT::v2i64), Arg, + getMOVLMask(2, DAG, dl)); break; } } @@ -2137,156 +2138,186 @@ static bool hasFPCMov(unsigned X86CC) { } } -/// isUndefOrInRange - Return true if Val is undef or if its value falls within -/// the specified range (L, H]. -static bool isUndefOrInRange(int Val, int Low, int Hi) { - return (Val < 0) || (Val >= Low && Val < Hi); -} - -/// isUndefOrEqual - Val is either less than zero (undef) or equal to the -/// specified value. -static bool isUndefOrEqual(int Val, int CmpVal) { - if (Val < 0 || Val == CmpVal) +/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return +/// true if Op is undef or if its value falls within the specified range (L, H]. +static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { + if (Op.getOpcode() == ISD::UNDEF) return true; - return false; + + unsigned Val = cast(Op)->getZExtValue(); + return (Val >= Low && Val < Hi); } -/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that -/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference -/// the second operand. -static bool isPSHUFDMask(const int *Mask, MVT VT) { - if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) - return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); - if (VT == MVT::v2f64 || VT == MVT::v2i64) - return (Mask[0] < 2 && Mask[1] < 2); - return false; +/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return +/// true if Op is undef or if its value equal to the specified value. +static bool isUndefOrEqual(SDValue Op, unsigned Val) { + if (Op.getOpcode() == ISD::UNDEF) + return true; + return cast(Op)->getZExtValue() == Val; } -bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { - return ::isPSHUFDMask(N->getMask(), N->getValueType(0)); -} +/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFD. +bool X86::isPSHUFDMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); -/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that -/// is suitable for input to PSHUFHW. -static bool isPSHUFHWMask(const int *Mask, MVT VT) { - if (VT != MVT::v8i16) + if (N->getNumOperands() != 2 && N->getNumOperands() != 4) return false; - - // Lower quadword copied in order or undef. - for (int i = 0; i != 4; ++i) - if (Mask[i] >= 0 && Mask[i] != i) + + // Check if the value doesn't reference the second vector. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast(Arg)->getZExtValue() >= e) return false; - + } + + return true; +} + +/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFHW. +bool X86::isPSHUFHWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 8) + return false; + + // Lower quadword copied in order. + for (unsigned i = 0; i != 4; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast(Arg)->getZExtValue() != i) + return false; + } + // Upper quadword shuffled. - for (int i = 4; i != 8; ++i) - if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) + for (unsigned i = 4; i != 8; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getZExtValue(); + if (Val < 4 || Val > 7) return false; - + } + return true; } -bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { - return ::isPSHUFHWMask(N->getMask(), N->getValueType(0)); -} +/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFLW. +bool X86::isPSHUFLWMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); -/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that -/// is suitable for input to PSHUFLW. -static bool isPSHUFLWMask(const int *Mask, MVT VT) { - if (VT != MVT::v8i16) + if (N->getNumOperands() != 8) return false; - - // Upper quadword copied in order. - for (int i = 4; i != 8; ++i) - if (Mask[i] >= 0 && Mask[i] != i) - return false; - - // Lower quadword shuffled. - for (int i = 0; i != 4; ++i) - if (Mask[i] >= 4) - return false; - - return true; -} -bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { - return ::isPSHUFLWMask(N->getMask(), N->getValueType(0)); + // Upper quadword copied in order. + for (unsigned i = 4; i != 8; ++i) + if (!isUndefOrEqual(N->getOperand(i), i)) + return false; + + // Lower quadword shuffled. + for (unsigned i = 0; i != 4; ++i) + if (!isUndefOrInRange(N->getOperand(i), 0, 4)) + return false; + + return true; } /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. -static bool isSHUFPMask(const int *Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); - if (NumElems != 2 && NumElems != 4) - return false; - - int Half = NumElems / 2; - for (int i = 0; i < Half; ++i) - if (!isUndefOrInRange(Mask[i], 0, NumElems)) +template +static bool isSHUFPMask(SDOperand *Elems, unsigned NumElems) { + if (NumElems != 2 && NumElems != 4) return false; + + unsigned Half = NumElems / 2; + for (unsigned i = 0; i < Half; ++i) + if (!isUndefOrInRange(Elems[i], 0, NumElems)) return false; - for (int i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) + for (unsigned i = Half; i < NumElems; ++i) + if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) return false; - + return true; } -bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { - return ::isSHUFPMask(N->getMask(), N->getValueType(0)); +bool X86::isSHUFPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); } /// isCommutedSHUFP - Returns true if the shuffle mask is exactly /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. -static bool isCommutedSHUFPMask(const int *Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); - - if (NumElems != 2 && NumElems != 4) - return false; - - int Half = NumElems / 2; - for (int i = 0; i < Half; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) +template +static bool isCommutedSHUFP(SDOperand *Ops, unsigned NumOps) { + if (NumOps != 2 && NumOps != 4) return false; + + unsigned Half = NumOps / 2; + for (unsigned i = 0; i < Half; ++i) + if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) return false; - for (int i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], 0, NumElems)) + for (unsigned i = Half; i < NumOps; ++i) + if (!isUndefOrInRange(Ops[i], 0, NumOps)) return false; return true; } -static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { - return isCommutedSHUFPMask(N->getMask(), N->getValueType(0)); +static bool isCommutedSHUFP(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); } /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. -bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +bool X86::isMOVHLPSMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 - const int *Mask = N->getMask(); - return isUndefOrEqual(Mask[0], 6) && - isUndefOrEqual(Mask[1], 7) && - isUndefOrEqual(Mask[2], 2) && - isUndefOrEqual(Mask[3], 3); + return isUndefOrEqual(N->getOperand(0), 6) && + isUndefOrEqual(N->getOperand(1), 7) && + isUndefOrEqual(N->getOperand(2), 2) && + isUndefOrEqual(N->getOperand(3), 3); +} + +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) + return false; + + // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 + return isUndefOrEqual(N->getOperand(0), 2) && + isUndefOrEqual(N->getOperand(1), 3) && + isUndefOrEqual(N->getOperand(2), 2) && + isUndefOrEqual(N->getOperand(3), 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. -bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); +bool X86::isMOVLPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; - const int *Mask = N->getMask(); for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(Mask[i], i + NumElems)) + if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) return false; for (unsigned i = NumElems/2; i < NumElems; ++i) - if (!isUndefOrEqual(Mask[i], i)) + if (!isUndefOrEqual(N->getOperand(i), i)) return false; return true; @@ -2295,49 +2326,37 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// and MOVLHPS. -bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); +bool X86::isMOVHPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; - const int *Mask = N->getMask(); for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(Mask[i], i)) + if (!isUndefOrEqual(N->getOperand(i), i)) return false; - for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems)) + for (unsigned i = 0; i < NumElems/2; ++i) { + SDValue Arg = N->getOperand(i + NumElems/2); + if (!isUndefOrEqual(Arg, i + NumElems)) return false; + } return true; } -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); - - if (NumElems != 4) - return false; - - // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 - const int *Mask = N->getMask(); - return isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3) && - isUndefOrEqual(Mask[2], 2) && isUndefOrEqual(Mask[3], 3); -} - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -static bool isUNPCKLMask(const int *Mask, MVT VT, bool V2IsSplat = false) { - int NumElts = VT.getVectorNumElements(); +template +bool static isUNPCKLMask(SDOperand *Elts, unsigned NumElts, + bool V2IsSplat = false) { if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; + + for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { + SDValue BitI = Elts[i]; + SDValue BitI1 = Elts[i+1]; if (!isUndefOrEqual(BitI, j)) return false; if (V2IsSplat) { @@ -2348,23 +2367,26 @@ static bool isUNPCKLMask(const int *Mask, MVT VT, bool V2IsSplat = false) { return false; } } + return true; } -bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { - return ::isUNPCKLMask(N->getMask(), N->getValueType(0), V2IsSplat); +bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -static bool isUNPCKHMask(const int *Mask, MVT VT, bool V2IsSplat = false) { - int NumElts = VT.getVectorNumElements(); +template +bool static isUNPCKHMask(SDOperand *Elts, unsigned NumElts, + bool V2IsSplat = false) { if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; + + for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { + SDValue BitI = Elts[i]; + SDValue BitI1 = Elts[i+1]; if (!isUndefOrEqual(BitI, j + NumElts/2)) return false; if (V2IsSplat) { @@ -2375,166 +2397,270 @@ static bool isUNPCKHMask(const int *Mask, MVT VT, bool V2IsSplat = false) { return false; } } + return true; } -bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { - return ::isUNPCKHMask(N->getMask(), N->getValueType(0), V2IsSplat); +bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(const int *Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); +bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; + + for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { + SDValue BitI = N->getOperand(i); + SDValue BitI1 = N->getOperand(i+1); + if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; -} -bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { - return ::isUNPCKL_v_undef_Mask(N->getMask(), N->getValueType(0)); + return true; } /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -static bool isUNPCKH_v_undef_Mask(const int *Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); +bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - - for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; + + for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + SDValue BitI = N->getOperand(i); + SDValue BitI1 = N->getOperand(i + 1); + if (!isUndefOrEqual(BitI, j)) return false; if (!isUndefOrEqual(BitI1, j)) return false; } - return true; -} -bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { - return ::isUNPCKH_v_undef_Mask(N->getMask(), N->getValueType(0)); + return true; } /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -static bool isMOVLMask(const int *Mask, MVT VT) { - int NumElts = VT.getVectorNumElements(); +template +static bool isMOVLMask(SDOperand *Elts, unsigned NumElts) { if (NumElts != 2 && NumElts != 4) return false; - - if (!isUndefOrEqual(Mask[0], NumElts)) + + if (!isUndefOrEqual(Elts[0], NumElts)) return false; - - for (int i = 1; i < NumElts; ++i) - if (!isUndefOrEqual(Mask[i], i)) + + for (unsigned i = 1; i < NumElts; ++i) { + if (!isUndefOrEqual(Elts[i], i)) return false; - + } + return true; } -bool X86::isMOVLMask(ShuffleVectorSDNode *N) { - return ::isMOVLMask(N->getMask(), N->getValueType(0)); +bool X86::isMOVLMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return ::isMOVLMask(N->op_begin(), N->getNumOperands()); } /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -static bool isCommutedMOVLMask(const int *Mask, MVT VT, bool V2IsSplat = false, - bool V2IsUndef = false) { - int NumOps = VT.getVectorNumElements(); +template +static bool isCommutedMOVL(SDOperand *Ops, unsigned NumOps, + bool V2IsSplat = false, + bool V2IsUndef = false) { if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; - - if (!isUndefOrEqual(Mask[0], 0)) + + if (!isUndefOrEqual(Ops[0], 0)) return false; - - for (int i = 1; i < NumOps; ++i) - if (!(isUndefOrEqual(Mask[i], i+NumOps) || - (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || - (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) + + for (unsigned i = 1; i < NumOps; ++i) { + SDValue Arg = Ops[i]; + if (!(isUndefOrEqual(Arg, i+NumOps) || + (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || + (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) return false; - + } + return true; } -static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, +static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, bool V2IsUndef = false) { - return isCommutedMOVLMask(N->getMask(), N->getValueType(0), V2IsSplat, - V2IsUndef); + assert(N->getOpcode() == ISD::BUILD_VECTOR); + return isCommutedMOVL(N->op_begin(), N->getNumOperands(), + V2IsSplat, V2IsUndef); } /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. -bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +bool X86::isMOVSHDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) return false; // Expect 1, 1, 3, 3 - const int *Mask = N->getMask(); - for (unsigned i = 0; i < 2; ++i) - if (Mask[i] >=0 && Mask[i] != 1) - return false; + for (unsigned i = 0; i < 2; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getZExtValue(); + if (Val != 1) return false; + } bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - if (Mask[i] >= 0 && Mask[i] != 3) - return false; - if (Mask[i] == 3) - HasHi = true; + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getZExtValue(); + if (Val != 3) return false; + HasHi = true; } + // Don't use movshdup if it can be done with a shufps. - // FIXME: verify that matching u, u, 3, 3 is what we want. return HasHi; } /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. -bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +bool X86::isMOVSLDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) return false; // Expect 0, 0, 2, 2 - const int *Mask = N->getMask(); - for (unsigned i = 0; i < 2; ++i) - if (Mask[i] > 0) - return false; + for (unsigned i = 0; i < 2; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getZExtValue(); + if (Val != 0) return false; + } bool HasHi = false; for (unsigned i = 2; i < 4; ++i) { - if (Mask[i] >= 0 && Mask[i] != 2) - return false; - if (Mask[i] == 2) - HasHi = true; + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getZExtValue(); + if (Val != 2) return false; + HasHi = true; } - // Don't use movsldup if it can be done with a shufps. + + // Don't use movshdup if it can be done with a shufps. return HasHi; } +/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a identity operation on the LHS or RHS. +static bool isIdentityMask(SDNode *N, bool RHS = false) { + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 0; i < NumElems; ++i) + if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + return false; + return true; +} + +/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies +/// a splat of a single element. +static bool isSplatMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + // This is a splat operation if each element of the permute is the same, and + // if the value doesn't reference the second vector. + unsigned NumElems = N->getNumOperands(); + SDValue ElementBase; + unsigned i = 0; + for (; i != NumElems; ++i) { + SDValue Elt = N->getOperand(i); + if (isa(Elt)) { + ElementBase = Elt; + break; + } + } + + if (!ElementBase.getNode()) + return false; + + for (; i != NumElems; ++i) { + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + if (Arg != ElementBase) return false; + } + + // Make sure it is a splat of the first vector operand. + return cast(ElementBase)->getZExtValue() < NumElems; +} + +/// getSplatMaskEltNo - Given a splat mask, return the index to the element +/// we want to splat. +static SDValue getSplatMaskEltNo(SDNode *N) { + assert(isSplatMask(N) && "Not a splat mask"); + unsigned NumElems = N->getNumOperands(); + SDValue ElementBase; + unsigned i = 0; + for (; i != NumElems; ++i) { + SDValue Elt = N->getOperand(i); + if (isa(Elt)) + return Elt; + } + assert(0 && " No splat value found!"); + return SDValue(); +} + + +/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies +/// a splat of a single element and it's a 2 or 4 element mask. +bool X86::isSplatMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + // We can only splat 64-bit, and 32-bit quantities with a single instruction. + if (N->getNumOperands() != 4 && N->getNumOperands() != 2) + return false; + return ::isSplatMask(N); +} + +/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a splat of zero element. +bool X86::isSplatLoMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + if (!isUndefOrEqual(N->getOperand(i), 0)) + return false; + return true; +} + /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. -bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { - int e = N->getValueType(0).getVectorNumElements() / 2; - const int *Mask = N->getMask(); - - for (int i = 0; i < e; ++i) - if (!isUndefOrEqual(Mask[i], i)) +bool X86::isMOVDDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned e = N->getNumOperands() / 2; + for (unsigned i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getOperand(i), i)) return false; - for (int i = 0; i < e; ++i) - if (!isUndefOrEqual(Mask[e+i], i)) + for (unsigned i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getOperand(e+i), i)) return false; return true; } @@ -2543,20 +2669,20 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast(N); - int NumOperands = SVOp->getValueType(0).getVectorNumElements(); - const int *MaskP = SVOp->getMask(); - + unsigned NumOperands = N->getNumOperands(); unsigned Shift = (NumOperands == 4) ? 2 : 1; unsigned Mask = 0; - for (int i = 0; i < NumOperands; ++i) { - int Val = MaskP[NumOperands-i-1]; - if (Val < 0) Val = 0; + for (unsigned i = 0; i < NumOperands; ++i) { + unsigned Val = 0; + SDValue Arg = N->getOperand(NumOperands-i-1); + if (Arg.getOpcode() != ISD::UNDEF) + Val = cast(Arg)->getZExtValue(); if (Val >= NumOperands) Val -= NumOperands; Mask |= Val; if (i != NumOperands - 1) Mask <<= Shift; } + return Mask; } @@ -2564,16 +2690,19 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW /// instructions. unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { - const int *MaskP = cast(N)->getMask(); unsigned Mask = 0; // 8 nodes, but we only care about the last 4. for (unsigned i = 7; i >= 4; --i) { - int Val = MaskP[i]; - if (Val >= 0) + unsigned Val = 0; + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + Val = cast(Arg)->getZExtValue(); Mask |= (Val - 4); + } if (i != 4) Mask <<= 2; } + return Mask; } @@ -2581,71 +2710,90 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { - const int *MaskP = cast(N)->getMask(); unsigned Mask = 0; // 8 nodes, but we only care about the first 4. for (int i = 3; i >= 0; --i) { - int Val = MaskP[i]; - if (Val >= 0) - Mask |= Val; + unsigned Val = 0; + SDValue Arg = N->getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) + Val = cast(Arg)->getZExtValue(); + Mask |= Val; if (i != 0) Mask <<= 2; } + return Mask; } -/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in -/// their permute mask. -static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - const int *Mask = SVOp->getMask(); - SmallVector MaskVec; - - for (int i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx < 0) - MaskVec.push_back(idx); - else if (idx < NumElems) - MaskVec.push_back(idx + NumElems); +/// CommuteVectorShuffle - Swap vector_shuffle operands as well as +/// values in ther permute mask. +static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, + SDValue &V2, SDValue &Mask, + SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + MVT MaskVT = Mask.getValueType(); + MVT EltVT = MaskVT.getVectorElementType(); + unsigned NumElems = Mask.getNumOperands(); + SmallVector MaskVec; + DebugLoc dl = Op.getDebugLoc(); + + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(DAG.getUNDEF(EltVT)); + continue; + } + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getZExtValue(); + if (Val < NumElems) + MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); else - MaskVec.push_back(idx - NumElems); + MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); } - return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), - SVOp->getOperand(0), &MaskVec[0]); + + std::swap(V1, V2); + Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); } /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, MVT VT) { - int NumElems = VT.getVectorNumElements(); - for (int i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx < 0) +static +SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG, DebugLoc dl) { + MVT MaskVT = Mask.getValueType(); + MVT EltVT = MaskVT.getVectorElementType(); + unsigned NumElems = Mask.getNumOperands(); + SmallVector MaskVec; + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(DAG.getUNDEF(EltVT)); continue; - else if (idx < NumElems) - Mask[i] = idx + NumElems; + } + assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Val = cast(Arg)->getZExtValue(); + if (Val < NumElems) + MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); else - Mask[i] = idx - NumElems; + MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); } + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &MaskVec[0], NumElems); } + /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). -static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { - int NumElems = Op->getValueType(0).getVectorNumElements(); - const int *Mask = Op->getMask(); - +static bool ShouldXformToMOVHLPS(SDNode *Mask) { + unsigned NumElems = Mask->getNumOperands(); if (NumElems != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) - if (!isUndefOrEqual(Mask[i], i+2)) + if (!isUndefOrEqual(Mask->getOperand(i), i+2)) return false; for (unsigned i = 2; i != 4; ++i) - if (!isUndefOrEqual(Mask[i], i+4)) + if (!isUndefOrEqual(Mask->getOperand(i), i+4)) return false; return true; } @@ -2669,8 +2817,7 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). And since V1 will become the source of the /// MOVLP, it must be either a vector load or a scalar load to vector. -static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, - ShuffleVectorSDNode *Op) { +static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) return false; // Is V2 is a vector load, don't do this transformation. We will try to use @@ -2678,16 +2825,14 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, if (ISD::isNON_EXTLoad(V2)) return false; - int NumElems = Op->getValueType(0).getVectorNumElements(); - const int *Mask = Op->getMask(); - + unsigned NumElems = Mask->getNumOperands(); if (NumElems != 2 && NumElems != 4) return false; - for (int i = 0, e = NumElems/2; i != e; ++i) - if (!isUndefOrEqual(Mask[i], i)) + for (unsigned i = 0, e = NumElems/2; i != e; ++i) + if (!isUndefOrEqual(Mask->getOperand(i), i)) return false; - for (int i = NumElems/2; i != NumElems; ++i) - if (!isUndefOrEqual(Mask[i], i+NumElems)) + for (unsigned i = NumElems/2; i != NumElems; ++i) + if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) return false; return true; } @@ -2705,6 +2850,29 @@ static bool isSplatVector(SDNode *N) { return true; } +/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved +/// to an undef. +static bool isUndefShuffle(SDNode *N) { + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) + return false; + + SDValue V1 = N->getOperand(0); + SDValue V2 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + unsigned Val = cast(Arg)->getZExtValue(); + if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) + return false; + else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) + return false; + } + } + return true; +} + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. static inline bool isZeroNode(SDValue Elt) { @@ -2715,26 +2883,34 @@ static inline bool isZeroNode(SDValue Elt) { } /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an zero vector. -/// FIXME: move to dag combiner? -static bool isZeroShuffle(ShuffleVectorSDNode *N) { +/// to an zero vector. +static bool isZeroShuffle(SDNode *N) { + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) + return false; + SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - const int *Mask = N->getMask(); - int NumElems = N->getValueType(0).getVectorNumElements(); - for (int i = 0; i != NumElems; ++i) { - int Idx = Mask[i]; - if (Idx >= NumElems) { - unsigned Opc = V2.getOpcode(); - if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) - continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) - return false; - } else if (Idx >= 0) { - unsigned Opc = V1.getOpcode(); + SDValue Mask = N->getOperand(2); + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) + continue; + + unsigned Idx = cast(Arg)->getZExtValue(); + if (Idx < NumElems) { + unsigned Opc = V1.getNode()->getOpcode(); if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V1.getNode()->getOperand(Idx))) + return false; + } else if (Idx >= NumElems) { + unsigned Opc = V2.getNode()->getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) + continue; + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) return false; } } @@ -2782,94 +2958,127 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. -static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - const int *Mask = SVOp->getMask(); - +static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { + assert(Mask.getOpcode() == ISD::BUILD_VECTOR); + bool Changed = false; - SmallVector MaskVec; - - for (int i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx > NumElems) { - idx = NumElems; - Changed = true; + SmallVector MaskVec; + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Arg = Mask.getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + unsigned Val = cast(Arg)->getZExtValue(); + if (Val > NumElems) { + Arg = DAG.getConstant(NumElems, Arg.getValueType()); + Changed = true; + } } - MaskVec.push_back(idx); + MaskVec.push_back(Arg); } + if (Changed) - return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), - SVOp->getOperand(1), &MaskVec[0]); - return SDValue(SVOp, 0); + Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(), + Mask.getValueType(), + &MaskVec[0], MaskVec.size()); + return Mask; } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, - SDValue V2) { - unsigned NumElems = VT.getVectorNumElements(); - SmallVector Mask; - Mask.push_back(NumElems); +static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG, DebugLoc dl) { + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT BaseVT = MaskVT.getVectorElementType(); + + SmallVector MaskVec; + MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); for (unsigned i = 1; i != NumElems; ++i) - Mask.push_back(i); - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); } -/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. -static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, - SDValue V2) { - unsigned NumElems = VT.getVectorNumElements(); - SmallVector Mask; +/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation +/// of specified width. +static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG, + DebugLoc dl) { + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT BaseVT = MaskVT.getVectorElementType(); + SmallVector MaskVec; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { - Mask.push_back(i); - Mask.push_back(i + NumElems); + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); } - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); } -/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. -static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, - SDValue V2) { - unsigned NumElems = VT.getVectorNumElements(); +/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation +/// of specified width. +static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG, + DebugLoc dl) { + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT BaseVT = MaskVT.getVectorElementType(); unsigned Half = NumElems/2; - SmallVector Mask; + SmallVector MaskVec; for (unsigned i = 0; i != Half; ++i) { - Mask.push_back(i + Half); - Mask.push_back(i + NumElems + Half); + MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); + MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); } - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); +} + +/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps +/// element #0 of a vector with the specified index, leaving the rest of the +/// elements in place. +static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, + SelectionDAG &DAG, DebugLoc dl) { + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT BaseVT = MaskVT.getVectorElementType(); + SmallVector MaskVec; + // Element #0 of the result gets the elt we are replacing. + MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); + for (unsigned i = 1; i != NumElems; ++i) + MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); } /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, - bool HasSSE2) { - if (SV->getValueType(0).getVectorNumElements() <= 4) - return SDValue(SV, 0); - - MVT PVT = MVT::v4f32; - MVT VT = SV->getValueType(0); - DebugLoc dl = SV->getDebugLoc(); - SDValue V1 = SV->getOperand(0); - int NumElems = VT.getVectorNumElements(); - int EltNo = SV->getSplatIndex(); - - // unpack elements to the correct location - while (NumElems > 4) { - if (EltNo < NumElems/2) { - V1 = getUnpackl(DAG, dl, VT, V1, V1); - } else { - V1 = getUnpackh(DAG, dl, VT, V1, V1); - EltNo -= NumElems/2; +static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { + MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; + MVT VT = Op.getValueType(); + if (PVT == VT) + return Op; + SDValue V1 = Op.getOperand(0); + SDValue Mask = Op.getOperand(2); + unsigned MaskNumElems = Mask.getNumOperands(); + unsigned NumElems = MaskNumElems; + DebugLoc dl = Op.getDebugLoc(); + // Special handling of v4f32 -> v4i32. + if (VT != MVT::v4f32) { + // Find which element we want to splat. + SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode(); + unsigned EltNo = cast(EltNoNode)->getZExtValue(); + // unpack elements to the correct location + while (NumElems > 4) { + if (EltNo < NumElems/2) { + Mask = getUnpacklMask(MaskNumElems, DAG, dl); + } else { + Mask = getUnpackhMask(MaskNumElems, DAG, dl); + EltNo -= NumElems/2; + } + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, Mask); + NumElems >>= 1; } - NumElems >>= 1; + SDValue Cst = DAG.getConstant(EltNo, MVT::i32); + Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } - - // Perform the splat. - int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); + SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, + DAG.getUNDEF(PVT), Mask); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); } /// isVectorLoad - Returns true if the node is a vector load, a scalar @@ -2886,28 +3095,32 @@ static bool isVectorLoad(SDValue Op) { /// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. /// -static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG, - bool HasSSE3) { +static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, + SelectionDAG &DAG, bool HasSSE3) { // If we have sse3 and shuffle has more than one use or input is a load, then // use movddup. Otherwise, use movlhps. - SDValue V1 = SV->getOperand(0); - - bool UseMovddup = HasSSE3 && (!SV->hasOneUse() || isVectorLoad(V1)); + bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; - MVT VT = SV->getValueType(0); + MVT VT = Op.getValueType(); if (VT == PVT) - return SDValue(SV, 0); - - DebugLoc dl = SV->getDebugLoc(); - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); - if (PVT.getVectorNumElements() == 2) { - int Mask[2] = { 0, 0 }; - V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); + return Op; + DebugLoc dl = Op.getDebugLoc(); + unsigned NumElems = PVT.getVectorNumElements(); + if (NumElems == 2) { + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); } else { - int Mask[4] = { 0, 1, 0, 1 }; - V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), Mask); + assert(NumElems == 4); + SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); + SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); + Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Cst0, Cst1, Cst0, Cst1); } - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); + + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, PVT, V1, + DAG.getUNDEF(PVT), Mask); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuffle); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -2917,31 +3130,39 @@ static SDValue CanonicalizeMovddup(ShuffleVectorSDNode *SV, SelectionDAG &DAG, static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool isZero, bool HasSSE2, SelectionDAG &DAG) { + DebugLoc dl = V2.getDebugLoc(); MVT VT = V2.getValueType(); SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); - unsigned NumElems = VT.getVectorNumElements(); - SmallVector MaskVec; + ? getZeroVector(VT, HasSSE2, DAG, dl) : DAG.getUNDEF(VT); + unsigned NumElems = V2.getValueType().getVectorNumElements(); + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT EVT = MaskVT.getVectorElementType(); + SmallVector MaskVec; for (unsigned i = 0; i != NumElems; ++i) - // If this is the insertion idx, put the low elt of V2 here. - MaskVec.push_back(i == Idx ? NumElems : i); - return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); + if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. + MaskVec.push_back(DAG.getConstant(NumElems, EVT)); + else + MaskVec.push_back(DAG.getConstant(i, EVT)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, Mask); } /// getNumOfConsecutiveZeros - Return the number of elements in a result of /// a shuffle that is zero. static -unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, const int *Mask, - int NumElems, bool Low, SelectionDAG &DAG) { +unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, + unsigned NumElems, bool Low, + SelectionDAG &DAG) { unsigned NumZeros = 0; - for (int i = 0; i < NumElems; ++i) { + for (unsigned i = 0; i < NumElems; ++i) { unsigned Index = Low ? i : NumElems-i-1; - int Idx = Mask[Index]; - if (Idx < 0) { + SDValue Idx = Mask.getOperand(Index); + if (Idx.getOpcode() == ISD::UNDEF) { ++NumZeros; continue; } - SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); + SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); if (Elt.getNode() && isZeroNode(Elt)) ++NumZeros; else @@ -2952,40 +3173,40 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, const int *Mask, /// isVectorShift - Returns true if the shuffle can be implemented as a /// logical left or right shift of a vector. -/// FIXME: split into pslldqi, psrldqi, palignr variants. -static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, +static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - const int *Mask = SVOp->getMask(); - int NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumElems = Mask.getNumOperands(); isLeft = true; - unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, true, DAG); + unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); if (!NumZeros) { isLeft = false; - NumZeros = getNumOfConsecutiveZeros(SVOp, Mask, NumElems, false, DAG); + NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); if (!NumZeros) return false; } + bool SeenV1 = false; bool SeenV2 = false; - for (int i = NumZeros; i < NumElems; ++i) { - int Val = isLeft ? (i - NumZeros) : i; - int Idx = Mask[isLeft ? i : (i - NumZeros)]; - if (Idx < 0) + for (unsigned i = NumZeros; i < NumElems; ++i) { + unsigned Val = isLeft ? (i - NumZeros) : i; + SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); + if (Idx.getOpcode() == ISD::UNDEF) continue; - if (Idx < NumElems) + unsigned Index = cast(Idx)->getZExtValue(); + if (Index < NumElems) SeenV1 = true; else { - Idx -= NumElems; + Index -= NumElems; SeenV2 = true; } - if (Idx != Val) + if (Index != Val) return false; } if (SeenV1 && SeenV2) return false; - ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); + ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); ShAmt = NumZeros; return true; } @@ -3070,8 +3291,8 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// getVShift - Return a vector logical shift node. /// static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, - unsigned NumBits, SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + unsigned NumBits, SelectionDAG &DAG, + const TargetLowering &TLI, DebugLoc dl) { bool isMMX = VT.getSizeInBits() == 64; MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; @@ -3156,13 +3377,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. if (Idx != 0) { - SmallVector Mask; - Mask.push_back(Idx); - for (unsigned i = 1; i != VecElts; ++i) - Mask.push_back(i); - Item = DAG.getVectorShuffle(VecVT, dl, Item, - DAG.getUNDEF(Item.getValueType()), - &Mask[0]); + SDValue Ops[] = { + Item, DAG.getUNDEF(Item.getValueType()), + getSwapEltZeroMask(VecElts, Idx, DAG, dl) + }; + Item = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VecVT, Ops, 3); } return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); } @@ -3206,10 +3425,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget->hasSSE2(), DAG); - SmallVector MaskVec; + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT MaskEVT = MaskVT.getVectorElementType(); + SmallVector MaskVec; for (unsigned i = 0; i < NumElems; i++) - MaskVec.push_back(i == Idx ? 0 : 1); - return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); + MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Item, + DAG.getUNDEF(VT), Mask); } } @@ -3267,48 +3491,54 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { V[i] = V[i*2]; // Must be a zero vector. break; case 1: - V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2+1], V[i*2], + getMOVLMask(NumElems, DAG, dl)); break; case 2: - V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], + getMOVLMask(NumElems, DAG, dl)); break; case 3: - V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i*2], V[i*2+1], + getUnpacklMask(NumElems, DAG, dl)); break; } } - SmallVector MaskVec; + MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT EVT = MaskVT.getVectorElementType(); + SmallVector MaskVec; bool Reverse = (NonZeros & 0x3) == 2; for (unsigned i = 0; i < 2; ++i) - MaskVec.push_back(Reverse ? 1-i : i); + if (Reverse) + MaskVec.push_back(DAG.getConstant(1-i, EVT)); + else + MaskVec.push_back(DAG.getConstant(i, EVT)); Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; for (unsigned i = 0; i < 2; ++i) - MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); - return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); + if (Reverse) + MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); + else + MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); + SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[0], V[1], ShufMask); } if (Values.size() > 2) { - // If we have SSE 4.1, Expand into a number of inserts. - if (getSubtarget()->hasSSE41()) { - V[0] = DAG.getUNDEF(VT); - for (unsigned i = 0; i < NumElems; ++i) - if (Op.getOperand(i).getOpcode() != ISD::UNDEF) - V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], - Op.getOperand(i), DAG.getIntPtrConstant(i)); - return V[0]; - } // Expand into a number of unpckl*. // e.g. for v4f32 // Step 1: unpcklps 0, 2 ==> X: // : unpcklps 1, 3 ==> Y: // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> + SDValue UnpckMask = getUnpacklMask(NumElems, DAG, dl); for (unsigned i = 0; i < NumElems; ++i) V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); NumElems >>= 1; while (NumElems != 0) { for (unsigned i = 0; i < NumElems; ++i) - V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V[i], V[i + NumElems], + UnpckMask); NumElems >>= 1; } return V[0]; @@ -3323,12 +3553,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) static -SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, X86TargetLowering &TLI) { - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - DebugLoc dl = SVOp->getDebugLoc(); - const int *Mask = SVOp->getMask(); +SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, + SDValue PermMask, SelectionDAG &DAG, + X86TargetLowering &TLI, DebugLoc dl) { + SmallVector MaskElts(PermMask.getNode()->op_begin(), + PermMask.getNode()->op_end()); SmallVector MaskVals; // Determine if more than 1 of the words in each of the low and high quadwords @@ -3339,7 +3568,9 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, BitVector InputQuads(4); for (unsigned i = 0; i < 8; ++i) { SmallVectorImpl &Quad = i < 4 ? LoQuad : HiQuad; - int EltIdx = Mask[i]; + SDValue Elt = MaskElts[i]; + int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : + cast(Elt)->getZExtValue(); MaskVals.push_back(EltIdx); if (EltIdx < 0) { ++Quad[0]; @@ -3392,12 +3623,14 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // words from all 4 input quadwords. SDValue NewV; if (BestLoQuad >= 0 || BestHiQuad >= 0) { - SmallVector MaskV; - MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); - MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); - NewV = DAG.getVectorShuffle(MVT::v2i64, dl, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); + SmallVector MaskV; + MaskV.push_back(DAG.getConstant(BestLoQuad < 0 ? 0 : BestLoQuad, MVT::i64)); + MaskV.push_back(DAG.getConstant(BestHiQuad < 0 ? 1 : BestHiQuad, MVT::i64)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, &MaskV[0], 2); + + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2i64, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), Mask); NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the @@ -3435,8 +3668,15 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, - DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); + MaskV.clear(); + for (unsigned i = 0; i != 8; ++i) + MaskV.push_back((MaskVals[i] < 0) ? DAG.getUNDEF(MVT::i16) + : DAG.getConstant(MaskVals[i], + MVT::i16)); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, + DAG.getUNDEF(MVT::v8i16), + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, + &MaskV[0], 8)); } } @@ -3493,45 +3733,49 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // and update MaskVals with new element order. BitVector InOrder(8); if (BestLoQuad >= 0) { - SmallVector MaskV; + SmallVector MaskV; for (int i = 0; i != 4; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(-1); + MaskV.push_back(DAG.getUNDEF(MVT::i16)); InOrder.set(i); } else if ((idx / 4) == BestLoQuad) { - MaskV.push_back(idx & 3); + MaskV.push_back(DAG.getConstant(idx & 3, MVT::i16)); InOrder.set(i); } else { - MaskV.push_back(-1); + MaskV.push_back(DAG.getUNDEF(MVT::i16)); } } for (unsigned i = 4; i != 8; ++i) - MaskV.push_back(i); - NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), - &MaskV[0]); + MaskV.push_back(DAG.getConstant(i, MVT::i16)); + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, + DAG.getUNDEF(MVT::v8i16), + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v8i16, &MaskV[0], 8)); } // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { - SmallVector MaskV; + SmallVector MaskV; for (unsigned i = 0; i != 4; ++i) - MaskV.push_back(i); + MaskV.push_back(DAG.getConstant(i, MVT::i16)); for (unsigned i = 4; i != 8; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(-1); + MaskV.push_back(DAG.getUNDEF(MVT::i16)); InOrder.set(i); } else if ((idx / 4) == BestHiQuad) { - MaskV.push_back((idx & 3) + 4); + MaskV.push_back(DAG.getConstant((idx & 3) + 4, MVT::i16)); InOrder.set(i); } else { - MaskV.push_back(-1); + MaskV.push_back(DAG.getUNDEF(MVT::i16)); } } - NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), - &MaskV[0]); + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v8i16, NewV, + DAG.getUNDEF(MVT::v8i16), + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v8i16, &MaskV[0], 8)); } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -3567,12 +3811,11 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // 2. [ssse3] 2 x pshufb + 1 x por // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw static -SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, X86TargetLowering &TLI) { - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - DebugLoc dl = SVOp->getDebugLoc(); - const int *Mask = SVOp->getMask(); +SDValue LowerVECTOR_SHUFFLEv16i8(SDValue V1, SDValue V2, + SDValue PermMask, SelectionDAG &DAG, + X86TargetLowering &TLI, DebugLoc dl) { + SmallVector MaskElts(PermMask.getNode()->op_begin(), + PermMask.getNode()->op_end()); SmallVector MaskVals; // If we have SSSE3, case 1 is generated when all result bytes come from @@ -3582,7 +3825,9 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, bool V1Only = true; bool V2Only = true; for (unsigned i = 0; i < 16; ++i) { - int EltIdx = Mask[i]; + SDValue Elt = MaskElts[i]; + int EltIdx = Elt.getOpcode() == ISD::UNDEF ? -1 : + cast(Elt)->getZExtValue(); MaskVals.push_back(EltIdx); if (EltIdx < 0) continue; @@ -3713,14 +3958,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> static -SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, - TargetLowering &TLI, DebugLoc dl) { - MVT VT = SVOp->getValueType(0); - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - const int *PermMask = SVOp->getMask(); - unsigned NumElems = VT.getVectorNumElements(); +SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, + MVT VT, + SDValue PermMask, SelectionDAG &DAG, + TargetLowering &TLI, DebugLoc dl) { + unsigned NumElems = PermMask.getNumOperands(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); MVT MaskEltVT = MaskVT.getVectorElementType(); @@ -3739,35 +3981,38 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, else NewVT = MVT::v2f64; } - int Scale = NumElems / NewWidth; - SmallVector MaskVec; + unsigned Scale = NumElems / NewWidth; + SmallVector MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { - int StartIdx = -1; - for (int j = 0; j < Scale; ++j) { - int EltIdx = PermMask[i+j]; - if (EltIdx < 0) + unsigned StartIdx = ~0U; + for (unsigned j = 0; j < Scale; ++j) { + SDValue Elt = PermMask.getOperand(i+j); + if (Elt.getOpcode() == ISD::UNDEF) continue; - if (StartIdx == -1) + unsigned EltIdx = cast(Elt)->getZExtValue(); + if (StartIdx == ~0U) StartIdx = EltIdx - (EltIdx % Scale); if (EltIdx != StartIdx + j) return SDValue(); } - if (StartIdx == -1) - MaskVec.push_back(-1); + if (StartIdx == ~0U) + MaskVec.push_back(DAG.getUNDEF(MaskEltVT)); else - MaskVec.push_back(StartIdx / Scale); + MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); } V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); - return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskVec[0], MaskVec.size())); } /// getVZextMovL - Return a zero-extending vector move low node. /// static SDValue getVZextMovL(MVT VT, MVT OpVT, - SDValue SrcOp, SelectionDAG &DAG, - const X86Subtarget *Subtarget, DebugLoc dl) { + SDValue SrcOp, SelectionDAG &DAG, + const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { LoadSDNode *LD = NULL; if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) @@ -3801,37 +4046,31 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT, /// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of /// shuffles. static SDValue -LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - DebugLoc dl = SVOp->getDebugLoc(); - MVT VT = SVOp->getValueType(0); - const int *PermMaskPtr = SVOp->getMask(); - +LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, + SDValue PermMask, MVT VT, SelectionDAG &DAG, + DebugLoc dl) { + MVT MaskVT = PermMask.getValueType(); + MVT MaskEVT = MaskVT.getVectorElementType(); SmallVector, 8> Locs; Locs.resize(4); - SmallVector Mask1(4U, -1); - SmallVector PermMask; - - for (unsigned i = 0; i != 8; ++i) - PermMask.push_back(PermMaskPtr[i]); - + SmallVector Mask1(4, DAG.getUNDEF(MaskEVT)); unsigned NumHi = 0; unsigned NumLo = 0; for (unsigned i = 0; i != 4; ++i) { - int Idx = PermMask[i]; - if (Idx < 0) { + SDValue Elt = PermMask.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) { Locs[i] = std::make_pair(-1, -1); } else { - assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!"); - if (Idx < 4) { + unsigned Val = cast(Elt)->getZExtValue(); + assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); + if (Val < 4) { Locs[i] = std::make_pair(0, NumLo); - Mask1[NumLo] = Idx; + Mask1[NumLo] = Elt; NumLo++; } else { Locs[i] = std::make_pair(1, NumHi); if (2+NumHi < 4) - Mask1[2+NumHi] = Idx; + Mask1[2+NumHi] = Elt; NumHi++; } } @@ -3842,21 +4081,24 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // implemented with two shuffles. First shuffle gather the elements. // The second shuffle, which takes the first shuffle as both of its // vector operands, put the elements into the right order. - V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &Mask1[0], Mask1.size())); - SmallVector Mask2(4U, -1); - + SmallVector Mask2(4, DAG.getUNDEF(MaskEVT)); for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) continue; else { unsigned Idx = (i < 2) ? 0 : 4; Idx += Locs[i].first * 2 + Locs[i].second; - Mask2[i] = Idx; + Mask2[i] = DAG.getConstant(Idx, MaskEVT); } } - return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &Mask2[0], Mask2.size())); } else if (NumLo == 3 || NumHi == 3) { // Otherwise, we must have three elements from one vector, call it X, and // one element from the other, call it Y. First, use a shufps to build an @@ -3867,51 +4109,60 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // from X. if (NumHi == 3) { // Normalize it so the 3 elements come from V1. - CommuteVectorShuffleMask(PermMask, VT); + PermMask = CommuteVectorShuffleMask(PermMask, DAG, dl); std::swap(V1, V2); } // Find the element from V2. unsigned HiIndex; for (HiIndex = 0; HiIndex < 3; ++HiIndex) { - int Val = PermMask[HiIndex]; - if (Val < 0) + SDValue Elt = PermMask.getOperand(HiIndex); + if (Elt.getOpcode() == ISD::UNDEF) continue; + unsigned Val = cast(Elt)->getZExtValue(); if (Val >= 4) break; } - Mask1[0] = PermMask[HiIndex]; - Mask1[1] = -1; - Mask1[2] = PermMask[HiIndex^1]; - Mask1[3] = -1; - V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + Mask1[0] = PermMask.getOperand(HiIndex); + Mask1[1] = DAG.getUNDEF(MaskEVT); + Mask1[2] = PermMask.getOperand(HiIndex^1); + Mask1[3] = DAG.getUNDEF(MaskEVT); + V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &Mask1[0], 4)); if (HiIndex >= 2) { - Mask1[0] = PermMask[0]; - Mask1[1] = PermMask[1]; - Mask1[2] = HiIndex & 1 ? 6 : 4; - Mask1[3] = HiIndex & 1 ? 4 : 6; - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); + Mask1[0] = PermMask.getOperand(0); + Mask1[1] = PermMask.getOperand(1); + Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); + Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MaskVT, &Mask1[0], 4)); } else { - Mask1[0] = HiIndex & 1 ? 2 : 0; - Mask1[1] = HiIndex & 1 ? 0 : 2; - Mask1[2] = PermMask[2]; - Mask1[3] = PermMask[3]; - if (Mask1[2] >= 0) - Mask1[2] += 4; - if (Mask1[3] >= 0) - Mask1[3] += 4; - return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); + Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); + Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); + Mask1[2] = PermMask.getOperand(2); + Mask1[3] = PermMask.getOperand(3); + if (Mask1[2].getOpcode() != ISD::UNDEF) + Mask1[2] = + DAG.getConstant(cast(Mask1[2])->getZExtValue()+4, + MaskEVT); + if (Mask1[3].getOpcode() != ISD::UNDEF) + Mask1[3] = + DAG.getConstant(cast(Mask1[3])->getZExtValue()+4, + MaskEVT); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V2, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MaskVT, &Mask1[0], 4)); } } // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); - SmallVector LoMask(4U, -1); - SmallVector HiMask(4U, -1); - - SmallVector *MaskPtr = &LoMask; + SmallVector LoMask(4, DAG.getUNDEF(MaskEVT)); + SmallVector HiMask(4, DAG.getUNDEF(MaskEVT)); + SmallVector *MaskPtr = &LoMask; unsigned MaskIdx = 0; unsigned LoIdx = 0; unsigned HiIdx = 2; @@ -3922,68 +4173,84 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { LoIdx = 0; HiIdx = 2; } - int Idx = PermMask[i]; - if (Idx < 0) { + SDValue Elt = PermMask.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) { Locs[i] = std::make_pair(-1, -1); - } else if (Idx < 4) { + } else if (cast(Elt)->getZExtValue() < 4) { Locs[i] = std::make_pair(MaskIdx, LoIdx); - (*MaskPtr)[LoIdx] = Idx; + (*MaskPtr)[LoIdx] = Elt; LoIdx++; } else { Locs[i] = std::make_pair(MaskIdx, HiIdx); - (*MaskPtr)[HiIdx] = Idx; + (*MaskPtr)[HiIdx] = Elt; HiIdx++; } } - SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); - SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); - SmallVector MaskOps; + SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &LoMask[0], LoMask.size())); + SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &HiMask[0], HiMask.size())); + SmallVector MaskOps; for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) { - MaskOps.push_back(-1); + MaskOps.push_back(DAG.getUNDEF(MaskEVT)); } else { unsigned Idx = Locs[i].first * 4 + Locs[i].second; - MaskOps.push_back(Idx); + MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); } } - return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LoShuffle, HiShuffle, + DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &MaskOps[0], MaskOps.size())); } SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { - ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); + SDValue PermMask = Op.getOperand(2); MVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - const int *PermMask = cast(Op.getNode())->getMask(); - unsigned NumElems = VT.getVectorNumElements(); + unsigned NumElems = PermMask.getNumOperands(); bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - if (isZeroShuffle(SVOp)) + // FIXME: Check for legal shuffle and return? + + if (isUndefShuffle(Op.getNode())) + return DAG.getUNDEF(VT); + + if (isZeroShuffle(Op.getNode())) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); - // Canonicalize movddup shuffles. - if (V2IsUndef && Subtarget->hasSSE2() && VT.getSizeInBits() == 128 && - X86::isMOVDDUPMask(SVOp)) - return CanonicalizeMovddup(SVOp, DAG, Subtarget->hasSSE3()); + if (isIdentityMask(PermMask.getNode())) + return V1; + else if (isIdentityMask(PermMask.getNode(), true)) + return V2; - // Promote splats to v4f32. - if (SVOp->isSplat()) { - if (isMMX || NumElems < 4) - return Op; - return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); + // Canonicalize movddup shuffles. + if (V2IsUndef && Subtarget->hasSSE2() && + VT.getSizeInBits() == 128 && + X86::isMOVDDUPMask(PermMask.getNode())) + return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); + + if (isSplatMask(PermMask.getNode())) { + if (isMMX || NumElems < 4) return Op; + // Promote it to a v4{if}32 splat. + return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); } // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, + *this, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BIT_CONVERT, dl, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); @@ -3991,29 +4258,32 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, + DAG, *this, dl); if (NewOp.getNode()) { - if (isCommutedMOVL(cast(NewOp), true, false)) - return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), - DAG, Subtarget, dl); + SDValue NewV1 = NewOp.getOperand(0); + SDValue NewV2 = NewOp.getOperand(1); + SDValue NewMask = NewOp.getOperand(2); + if (isCommutedMOVL(NewMask.getNode(), true, false)) { + NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); + return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget, + dl); + } } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); - if (NewOp.getNode() && X86::isMOVLMask(cast(NewOp))) + SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, + DAG, *this, dl); + if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), - DAG, Subtarget, dl); + DAG, Subtarget, dl); } } - - if (X86::isPSHUFDMask(SVOp)) - return Op; - + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = getSubtarget()->hasSSE2() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4021,8 +4291,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - - if (X86::isMOVLMask(SVOp)) { + + if (X86::isMOVLMask(PermMask.getNode())) { if (V1IsUndef) return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) @@ -4030,18 +4300,17 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX) return Op; } - - // FIXME: fold these into legal mask. - if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || - X86::isMOVSLDUPMask(SVOp) || - X86::isMOVHLPSMask(SVOp) || - X86::isMOVHPMask(SVOp) || - X86::isMOVLPMask(SVOp))) + + if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || + X86::isMOVSLDUPMask(PermMask.getNode()) || + X86::isMOVHLPSMask(PermMask.getNode()) || + X86::isMOVHPMask(PermMask.getNode()) || + X86::isMOVLPMask(PermMask.getNode()))) return Op; - if (ShouldXformToMOVHLPS(SVOp) || - ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) - return CommuteVectorShuffle(SVOp, DAG); + if (ShouldXformToMOVHLPS(PermMask.getNode()) || + ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) + return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); if (isShift) { // No better options. Use a vshl / vsrl. @@ -4049,7 +4318,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShAmt *= EVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. @@ -4058,84 +4327,115 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Canonicalize the splat or undef, if present, to be on the RHS. if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { - Op = CommuteVectorShuffle(SVOp, DAG); - SVOp = cast(Op); - V1 = SVOp->getOperand(0); - V2 = SVOp->getOperand(1); + Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); std::swap(V1IsSplat, V2IsSplat); std::swap(V1IsUndef, V2IsUndef); Commuted = true; } - if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { - // Shuffling low element of v1 into undef, just return v1. - if (V2IsUndef) - return V1; - // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which - // the instruction selector will not match, so get a canonical MOVL with - // swapped operands to undo the commute. - return getMOVL(DAG, dl, VT, V2, V1); + // FIXME: Figure out a cleaner way to do this. + if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { + if (V2IsUndef) return V1; + Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (V2IsSplat) { + // V2 is a splat, so the mask may be malformed. That is, it may point + // to any V2 element. The instruction selectior won't like this. Get + // a corrected mask and commute to form a proper MOVS{S|D}. + SDValue NewMask = getMOVLMask(NumElems, DAG, dl); + if (NewMask.getNode() != PermMask.getNode()) + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); + } + return Op; } - if (X86::isUNPCKL_v_undef_Mask(SVOp) || - X86::isUNPCKH_v_undef_Mask(SVOp) || - X86::isUNPCKLMask(SVOp) || - X86::isUNPCKHMask(SVOp)) + if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || + X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || + X86::isUNPCKLMask(PermMask.getNode()) || + X86::isUNPCKHMask(PermMask.getNode())) return Op; if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first // element then try to match unpck{h|l} again. If match, return a // new vector_shuffle with the corrected mask. - SDValue NewMask = NormalizeMask(SVOp, DAG); - ShuffleVectorSDNode *NSVOp = cast(NewMask); - if (NSVOp != SVOp) { - if (X86::isUNPCKLMask(NSVOp, true)) { - return NewMask; - } else if (X86::isUNPCKHMask(NSVOp, true)) { - return NewMask; + SDValue NewMask = NormalizeMask(PermMask, DAG); + if (NewMask.getNode() != PermMask.getNode()) { + if (X86::isUNPCKLMask(NewMask.getNode(), true)) { + SDValue NewMask = getUnpacklMask(NumElems, DAG, dl); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); + } else if (X86::isUNPCKHMask(NewMask.getNode(), true)) { + SDValue NewMask = getUnpackhMask(NumElems, DAG, dl); + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask); } } } + // Normalize the node to match x86 shuffle ops if needed + if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) + Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (Commuted) { // Commute is back and try unpck* again. - // FIXME: this seems wrong. - SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); - ShuffleVectorSDNode *NewSVOp = cast(NewOp); - if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || - X86::isUNPCKH_v_undef_Mask(NewSVOp) || - X86::isUNPCKLMask(NewSVOp) || - X86::isUNPCKHMask(NewSVOp)) - return NewOp; + Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); + if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || + X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || + X86::isUNPCKLMask(PermMask.getNode()) || + X86::isUNPCKHMask(PermMask.getNode())) + return Op; } // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - - // Normalize the node to match x86 shuffle ops if needed - if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) - return CommuteVectorShuffle(SVOp, DAG); - - // Check for legal shuffle and return? - if (isShuffleMaskLegal(PermMask, VT)) + // Try PSHUF* first, then SHUFP*. + // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically + // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. + if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { + if (V2.getOpcode() != ISD::UNDEF) + return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, + DAG.getUNDEF(VT), PermMask); return Op; - + } + + if (!isMMX) { + if (Subtarget->hasSSE2() && + (X86::isPSHUFDMask(PermMask.getNode()) || + X86::isPSHUFHWMask(PermMask.getNode()) || + X86::isPSHUFLWMask(PermMask.getNode()))) { + MVT RVT = VT; + if (VT == MVT::v4f32) { + RVT = MVT::v4i32; + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, + DAG.getNode(ISD::BIT_CONVERT, dl, RVT, V1), + DAG.getUNDEF(RVT), PermMask); + } else if (V2.getOpcode() != ISD::UNDEF) + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, V1, + DAG.getUNDEF(RVT), PermMask); + if (RVT != VT) + Op = DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); + return Op; + } + + // Binary or unary shufps. + if (X86::isSHUFPMask(PermMask.getNode()) || + (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) + return Op; + } + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this, dl); if (NewOp.getNode()) return NewOp; } if (VT == MVT::v16i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); + SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(V1, V2, PermMask, DAG, *this, dl); if (NewOp.getNode()) return NewOp; } // Handle all 4 wide cases with a number of shuffles except for MMX. if (NumElems == 4 && !isMMX) - return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); + return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG, dl); return SDValue(); } @@ -4229,12 +4529,22 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; - // SHUFPS the element to the lowest double word, then movss. - int Mask[4] = { Idx, -1, -1, -1 }; - MVT VVT = Op.getOperand(0).getValueType(); - SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), - DAG.getUNDEF(VVT), Mask); + MVT MaskVT = MVT::getIntVectorWithNumElements(4); + SmallVector IdxVec; + IdxVec. + push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); + IdxVec. + push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); + IdxVec. + push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); + IdxVec. + push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &IdxVec[0], IdxVec.size()); + SDValue Vec = Op.getOperand(0); + Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), + Vec, DAG.getUNDEF(Vec.getValueType()), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } else if (VT.getSizeInBits() == 64) { @@ -4248,10 +4558,17 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. - int Mask[2] = { 1, -1 }; - MVT VVT = Op.getOperand(0).getValueType(); - SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), - DAG.getUNDEF(VVT), Mask); + MVT MaskVT = MVT::getIntVectorWithNumElements(2); + SmallVector IdxVec; + IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); + IdxVec. + push_back(DAG.getUNDEF(MaskVT.getVectorElementType())); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, + &IdxVec[0], IdxVec.size()); + SDValue Vec = Op.getOperand(0); + Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(), + Vec, DAG.getUNDEF(Vec.getValueType()), + Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); } @@ -4758,6 +5075,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); + SmallVector MaskVec; + MaskVec.push_back(DAG.getConstant(0, MVT::i32)); + MaskVec.push_back(DAG.getConstant(4, MVT::i32)); + MaskVec.push_back(DAG.getConstant(1, MVT::i32)); + MaskVec.push_back(DAG.getConstant(5, MVT::i32)); + SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &MaskVec[0], MaskVec.size()); + SmallVector MaskVec2; + MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); + MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); + SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, + &MaskVec2[0], MaskVec2.size()); + SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), @@ -4766,11 +5096,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(0), DAG.getIntPtrConstant(0))); - SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); + SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, + XR1, XR2, UnpcklMask); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, false, 16); - SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); + SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v4i32, + Unpck1, CLod0, UnpcklMask); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, @@ -4778,9 +5110,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. - int ShufMask[2] = { 1, -1 }; - SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, - DAG.getUNDEF(MVT::v2f64), ShufMask); + SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v2f64, + Sub, Sub, ShufMask); SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, DAG.getIntPtrConstant(0)); @@ -6932,36 +7263,34 @@ bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool -X86TargetLowering::isShuffleMaskLegal(const int *Mask, MVT VT) const { +X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { // Only do shuffles on 128-bit vector types for now. - if (VT.getSizeInBits() == 64) - return false; - - // FIXME: pshufb, blends, palignr, shifts. - return (VT.getVectorNumElements() == 2 || - ShuffleVectorSDNode::isSplatMask(Mask, VT) || - isMOVLMask(Mask, VT) || - isSHUFPMask(Mask, VT) || - isPSHUFDMask(Mask, VT) || - isPSHUFHWMask(Mask, VT) || - isPSHUFLWMask(Mask, VT) || - isUNPCKLMask(Mask, VT) || - isUNPCKHMask(Mask, VT) || - isUNPCKL_v_undef_Mask(Mask, VT) || - isUNPCKH_v_undef_Mask(Mask, VT)); + // FIXME: pshufb, blends + if (VT.getSizeInBits() == 64) return false; + return (Mask.getNode()->getNumOperands() <= 4 || + isIdentityMask(Mask.getNode()) || + isIdentityMask(Mask.getNode(), true) || + isSplatMask(Mask.getNode()) || + X86::isPSHUFHWMask(Mask.getNode()) || + X86::isPSHUFLWMask(Mask.getNode()) || + X86::isUNPCKLMask(Mask.getNode()) || + X86::isUNPCKHMask(Mask.getNode()) || + X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || + X86::isUNPCKH_v_undef_Mask(Mask.getNode())); } bool -X86TargetLowering::isVectorClearMaskLegal(const int *Mask, MVT VT) const { - unsigned NumElts = VT.getVectorNumElements(); - // FIXME: This collection of masks seems suspect. - if (NumElts == 2) - return true; - if (NumElts == 4 && VT.getSizeInBits() == 128) { - return (isMOVLMask(Mask, VT) || - isCommutedMOVLMask(Mask, VT, true) || - isSHUFPMask(Mask, VT) || - isCommutedSHUFPMask(Mask, VT)); +X86TargetLowering::isVectorClearMaskLegal(const std::vector &BVOps, + MVT EVT, SelectionDAG &DAG) const { + unsigned NumElts = BVOps.size(); + // Only do shuffles on 128-bit vector types for now. + if (EVT.getSizeInBits() * NumElts == 64) return false; + if (NumElts == 2) return true; + if (NumElts == 4) { + return (isMOVLMask(&BVOps[0], 4) || + isCommutedMOVL(&BVOps[0], 4, true) || + isSHUFPMask(&BVOps[0], 4) || + isCommutedSHUFP(&BVOps[0], 4)); } return false; } @@ -7696,14 +8025,15 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, return false; } -static bool EltsFromConsecutiveLoads(SDNode *N, const int *PermMask, +static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, unsigned NumElems, MVT EVT, SDNode *&Base, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { Base = NULL; for (unsigned i = 0; i < NumElems; ++i) { - if (PermMask[i] < 0) { + SDValue Idx = PermMask.getOperand(i); + if (Idx.getOpcode() == ISD::UNDEF) { if (!Base) return false; continue; @@ -7736,12 +8066,12 @@ static bool EltsFromConsecutiveLoads(SDNode *N, const int *PermMask, /// shuffle to be an appropriate build vector so it can take advantage of // performBuildVectorCombine. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - const int *PermMask = cast(N)->getMask(); - unsigned NumElems = VT.getVectorNumElements(); + SDValue PermMask = N->getOperand(2); + unsigned NumElems = PermMask.getNumOperands(); // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 // where the upper half is 0, it is advantageous to rewrite it as a build @@ -7750,10 +8080,9 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, SDValue In[2]; In[0] = N->getOperand(0); In[1] = N->getOperand(1); - unsigned Idx0 = PermMask[0]; - unsigned Idx1 = PermMask[1]; - // FIXME: can we take advantage of undef index? - if (PermMask[0] >= 0 && PermMask[1] >= 0 && + unsigned Idx0 =cast(PermMask.getOperand(0))->getZExtValue(); + unsigned Idx1 =cast(PermMask.getOperand(1))->getZExtValue(); + if (In[0].getValueType().getVectorNumElements() == NumElems && In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { ConstantSDNode* InsertVecIdx = @@ -8217,9 +8546,9 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - cast(ShAmtOp)->isSplat()) { - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); + isSplatMask(ShAmtOp.getOperand(2).getNode())) { + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); } else return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d91951c62e6..050b86979c5 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -230,8 +230,7 @@ namespace llvm { // VSHL, VSRL - Vector logical left / right shift. VSHL, VSRL, - - // CMPPD, CMPPS - Vector double/float comparison. + // CMPPD, CMPPS - Vector double/float comparison. CMPPD, CMPPS, @@ -252,72 +251,80 @@ namespace llvm { namespace X86 { /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFDMask(ShuffleVectorSDNode *N); + bool isPSHUFDMask(SDNode *N); /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFHWMask(ShuffleVectorSDNode *N); + bool isPSHUFHWMask(SDNode *N); /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFLWMask(ShuffleVectorSDNode *N); + bool isPSHUFLWMask(SDNode *N); /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. - bool isSHUFPMask(ShuffleVectorSDNode *N); + bool isSHUFPMask(SDNode *N); /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. - bool isMOVHLPSMask(ShuffleVectorSDNode *N); + bool isMOVHLPSMask(SDNode *N); /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> - bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N); + bool isMOVHLPS_v_undef_Mask(SDNode *N); /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for MOVLP{S|D}. - bool isMOVLPMask(ShuffleVectorSDNode *N); + /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. + bool isMOVLPMask(SDNode *N); /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. + /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// as well as MOVLHPS. - bool isMOVHPMask(ShuffleVectorSDNode *N); + bool isMOVHPMask(SDNode *N); /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false); /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false); /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> - bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N); + bool isUNPCKL_v_undef_Mask(SDNode *N); /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> - bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N); + bool isUNPCKH_v_undef_Mask(SDNode *N); /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. - bool isMOVLMask(ShuffleVectorSDNode *N); + bool isMOVLMask(SDNode *N); /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. - bool isMOVSHDUPMask(ShuffleVectorSDNode *N); + bool isMOVSHDUPMask(SDNode *N); /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. - bool isMOVSLDUPMask(ShuffleVectorSDNode *N); + bool isMOVSLDUPMask(SDNode *N); + + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of a single element. + bool isSplatMask(SDNode *N); + + /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of zero element. + bool isSplatLoMask(SDNode *N); /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. - bool isMOVDDUPMask(ShuffleVectorSDNode *N); + bool isMOVDDUPMask(SDNode *N); /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* @@ -470,13 +477,14 @@ namespace llvm { /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask /// values are assumed to be legal. - virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const; + virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const; /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is /// used by Targets can use this to indicate if there is a suitable /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. - virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const; + virtual bool isVectorClearMaskLegal(const std::vector &BVOps, + MVT EVT, SelectionDAG &DAG) const; /// ShouldShrinkFPConstant - If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 511d42abc10..462433bb27b 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -3801,7 +3801,6 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), (implicit EFLAGS)), (DEC32m addr:$dst)>, Requires<[In32BitMode]>; - //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 338b9e294b4..71f2cb164d5 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -30,37 +30,33 @@ def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; // MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to // PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm; // Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast(N)); +def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKHMask(N); }]>; // Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast(N)); +def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKLMask(N); }]>; // Patterns for: vector_shuffle v1, , <0, 0, 1, 1, ...> -def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast(N)); +def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKH_v_undef_Mask(N); }]>; // Patterns for: vector_shuffle v1, , <2, 2, 3, 3, ...> -def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast(N)); +def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKL_v_undef_Mask(N); }]>; -def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast(N)); +// Patterns for shuffling. +def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFDMask(N); }], MMX_SHUFFLE_get_shuf_imm>; //===----------------------------------------------------------------------===// @@ -189,8 +185,9 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src), def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (movl immAllZerosV, - (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>; + (v2i64 (vector_shuffle immAllZerosV, + (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))), + MOVL_shuffle_mask)))]>; let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), @@ -322,74 +319,86 @@ let isTwoAddress = 1 in { (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + (v8i8 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; + (v8i8 (vector_shuffle VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)), + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + (v4i16 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; + (v4i16 (vector_shuffle VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)), + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; + (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; + (v2i32 (vector_shuffle VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)), + MMX_UNPCKH_shuffle_mask)))]>; // Unpack Low Packed Data Instructions def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + (v8i8 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; + (v8i8 (vector_shuffle VR64:$src1, + (bc_v8i8 (load_mmx addr:$src2)), + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + (v4i16 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; + (v4i16 (vector_shuffle VR64:$src1, + (bc_v4i16 (load_mmx addr:$src2)), + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; + (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, + MMX_UNPCKL_shuffle_mask)))]>; def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; + (v2i32 (vector_shuffle VR64:$src1, + (bc_v2i32 (load_mmx addr:$src2)), + MMX_UNPCKL_shuffle_mask)))]>; } // -- Pack Instructions @@ -402,13 +411,17 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>; + (v4i16 (vector_shuffle + VR64:$src1, (undef), + MMX_PSHUFW_shuffle_mask:$src2)))]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)), - (undef)))]>; + (v4i16 (vector_shuffle + (bc_v4i16 (load_mmx addr:$src1)), + (undef), + MMX_PSHUFW_shuffle_mask:$src2)))]>; // -- Conversion Instructions let neverHasSideEffects = 1 in { @@ -614,27 +627,34 @@ def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))), // Patterns to perform canonical versions of vector shuffling. let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), + def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKL_v_undef_shuffle_mask)), (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))), + def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKL_v_undef_shuffle_mask)), (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))), + def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKL_v_undef_shuffle_mask)), (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; } let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))), + def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKH_v_undef_shuffle_mask)), (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))), + def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKH_v_undef_shuffle_mask)), (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))), + def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), + MMX_UNPCKH_v_undef_shuffle_mask)), (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } // Patterns to perform vector shuffling with a zeroed out vector. let AddedComplexity = 20 in { - def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, - (v2i32 (scalar_to_vector (load_mmx addr:$src))))), + def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV, + (v2i32 (scalar_to_vector (load_mmx addr:$src))), + MMX_UNPCKL_shuffle_mask)), (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a10f4433e80..3ce35bd6255 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -175,108 +175,103 @@ def PSxLDQ_imm : SDNodeXForm; // SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to // PSHUFHW imm. -def SHUFFLE_get_pshufhw_imm : SDNodeXForm; // SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to // PSHUFLW imm. -def SHUFFLE_get_pshuflw_imm : SDNodeXForm; -def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast(N); - return SVOp->isSplat() && SVOp->getSplatIndex() == 0; -}]>; - -def movddup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVDDUPMask(cast(N)); -}]>; - -def movhlps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPSMask(cast(N)); -}]>; - -def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPS_v_undef_Mask(cast(N)); -}]>; - -def movhp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHPMask(cast(N)); -}]>; - -def movlp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLPMask(cast(N)); -}]>; - -def movl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLMask(cast(N)); -}]>; - -def movshdup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSHDUPMask(cast(N)); -}]>; - -def movsldup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSLDUPMask(cast(N)); -}]>; - -def unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast(N)); -}]>; - -def unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast(N)); -}]>; - -def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast(N)); -}]>; - -def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast(N)); -}]>; - -def pshufd : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast(N)); +def SSE_splat_mask : PatLeaf<(build_vector), [{ + return X86::isSplatMask(N); }], SHUFFLE_get_shuf_imm>; -def shufp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isSHUFPMask(cast(N)); +def SSE_splat_lo_mask : PatLeaf<(build_vector), [{ + return X86::isSplatLoMask(N); +}]>; + +def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVDDUPMask(N); +}]>; + +def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVHLPSMask(N); +}]>; + +def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVHLPS_v_undef_Mask(N); +}]>; + +def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVHPMask(N); +}]>; + +def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVLPMask(N); +}]>; + +def MOVL_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVLMask(N); +}]>; + +def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVSHDUPMask(N); +}]>; + +def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVSLDUPMask(N); +}]>; + +def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKLMask(N); +}]>; + +def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKHMask(N); +}]>; + +def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKL_v_undef_Mask(N); +}]>; + +def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKH_v_undef_Mask(N); +}]>; + +def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFDMask(N); }], SHUFFLE_get_shuf_imm>; -def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFHWMask(cast(N)); +def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFHWMask(N); }], SHUFFLE_get_pshufhw_imm>; -def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFLWMask(cast(N)); +def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFLWMask(N); }], SHUFFLE_get_pshuflw_imm>; +def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFDMask(N); +}], SHUFFLE_get_shuf_imm>; + +def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isSHUFPMask(N); +}], SHUFFLE_get_shuf_imm>; + +def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isSHUFPMask(N); +}], SHUFFLE_get_shuf_imm>; + + //===----------------------------------------------------------------------===// // SSE scalar FP Instructions //===----------------------------------------------------------------------===// @@ -709,14 +704,16 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (movlp VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; + (v4f32 (vector_shuffle VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), + MOVLP_shuffle_mask)))]>; def MOVHPSrm : PSI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (movhp VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; + (v4f32 (vector_shuffle VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), + MOVHP_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -731,25 +728,29 @@ def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (unpckh (bc_v2f64 (v4f32 VR128:$src)), - (undef)), (iPTR 0))), addr:$dst)]>; + (v2f64 (vector_shuffle + (bc_v2f64 (v4f32 VR128:$src)), (undef), + UNPCKH_shuffle_mask)), (iPTR 0))), + addr:$dst)]>; let Constraints = "$src1 = $dst" in { let AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHP_shuffle_mask)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHLPS_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" let AddedComplexity = 20 in -def : Pat<(v4f32 (movddup VR128:$src, (undef))), +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)), (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; @@ -907,41 +908,51 @@ let Constraints = "$src1 = $dst" in { let isConvertibleToThreeAddress = 1 in // Convert to pshufd def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, - VR128:$src2, i8imm:$src3), + VR128:$src2, i32i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$src3)))]>; def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i8imm:$src3), + f128mem:$src2, i32i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v4f32 (shufp:$src3 - VR128:$src1, (memopv4f32 addr:$src2))))]>; + (v4f32 (vector_shuffle + VR128:$src1, (memopv4f32 addr:$src2), + SHUFP_shuffle_mask:$src3)))]>; let AddedComplexity = 10 in { def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, - (memopv4f32 addr:$src2))))]>; + (v4f32 (vector_shuffle + VR128:$src1, (memopv4f32 addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; + (v4f32 (vector_shuffle + VR128:$src1, (memopv4f32 addr:$src2), + UNPCKL_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -1033,7 +1044,8 @@ let neverHasSideEffects = 1 in (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movl VR128:$src1, VR128:$src2)))]>; + (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVL_shuffle_mask)))]>; } // Move to lower bits of a VR128 and zeroing upper bits. @@ -1439,14 +1451,16 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movlp VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; + (v2f64 (vector_shuffle VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)), + MOVLP_shuffle_mask)))]>; def MOVHPDrm : PDI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movhp VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; + (v2f64 (vector_shuffle VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)), + MOVHP_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -1460,8 +1474,9 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (v2f64 (unpckh VR128:$src, (undef))), - (iPTR 0))), addr:$dst)]>; + (v2f64 (vector_shuffle VR128:$src, (undef), + UNPCKH_shuffle_mask)), (iPTR 0))), + addr:$dst)]>; // SSE2 instructions without OpSize prefix def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -1729,39 +1744,48 @@ let Constraints = "$src1 = $dst" in { def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$src3)))]>; def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (v2f64 (shufp:$src3 - VR128:$src1, (memopv2f64 addr:$src2))))]>; + (v2f64 (vector_shuffle + VR128:$src1, (memopv2f64 addr:$src2), + SHUFP_shuffle_mask:$src3)))]>; let AddedComplexity = 10 in { def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; + (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, - (memopv2f64 addr:$src2))))]>; + (v2f64 (vector_shuffle + VR128:$src1, (memopv2f64 addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; + (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; + (v2f64 (vector_shuffle + VR128:$src1, (memopv2f64 addr:$src2), + UNPCKL_shuffle_mask)))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -2019,43 +2043,49 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; def PSHUFDri : PDIi8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 - VR128:$src1, (undef))))]>; + [(set VR128:$dst, (v4i32 (vector_shuffle + VR128:$src1, (undef), + PSHUFD_shuffle_mask:$src2)))]>; def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 + [(set VR128:$dst, (v4i32 (vector_shuffle (bc_v4i32(memopv2i64 addr:$src1)), - (undef))))]>; + (undef), + PSHUFD_shuffle_mask:$src2)))]>; // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1, - (undef))))]>, + [(set VR128:$dst, (v8i16 (vector_shuffle + VR128:$src1, (undef), + PSHUFHW_shuffle_mask:$src2)))]>, XS, Requires<[HasSSE2]>; def PSHUFHWmi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, + [(set VR128:$dst, (v8i16 (vector_shuffle + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef), + PSHUFHW_shuffle_mask:$src2)))]>, XS, Requires<[HasSSE2]>; // SSE2 with ImmT == Imm8 and XD prefix. def PSHUFLWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1, - (undef))))]>, + [(set VR128:$dst, (v8i16 (vector_shuffle + VR128:$src1, (undef), + PSHUFLW_shuffle_mask:$src2)))]>, XD, Requires<[HasSSE2]>; def PSHUFLWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2), "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, + [(set VR128:$dst, (v8i16 (vector_shuffle + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef), + PSHUFLW_shuffle_mask:$src2)))]>, XD, Requires<[HasSSE2]>; @@ -2064,91 +2094,107 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; + (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; + (v16i8 (vector_shuffle VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), + UNPCKL_shuffle_mask)))]>; def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; + (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; + (v8i16 (vector_shuffle VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)), + UNPCKL_shuffle_mask)))]>; def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; + (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + (v4i32 (vector_shuffle VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), + UNPCKL_shuffle_mask)))]>; def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; + (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, - (memopv2i64 addr:$src2))))]>; + (v2i64 (vector_shuffle VR128:$src1, + (memopv2i64 addr:$src2), + UNPCKL_shuffle_mask)))]>; def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; + (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; + [(set VR128:$dst, + (v16i8 (vector_shuffle VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), + UNPCKH_shuffle_mask)))]>; def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; + (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; + (v8i16 (vector_shuffle VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)), + UNPCKH_shuffle_mask)))]>; def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; + (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + (v4i32 (vector_shuffle VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), + UNPCKH_shuffle_mask)))]>; def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; + (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, - (memopv2i64 addr:$src2))))]>; + (v2i64 (vector_shuffle VR128:$src1, + (memopv2i64 addr:$src2), + UNPCKH_shuffle_mask)))]>; } // Extract / Insert @@ -2311,7 +2357,8 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movsd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movl VR128:$src1, VR128:$src2)))]>; + (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, + MOVL_shuffle_mask)))]>; } // Store / copy lower 64-bits of a XMM register. @@ -2402,35 +2449,44 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), // Move Instructions def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movshdup - VR128:$src, (undef))))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src, (undef), + MOVSHDUP_shuffle_mask)))]>; def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movshdup - (memopv4f32 addr:$src), (undef)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + (memopv4f32 addr:$src), (undef), + MOVSHDUP_shuffle_mask)))]>; def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movsldup - VR128:$src, (undef))))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src, (undef), + MOVSLDUP_shuffle_mask)))]>; def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movsldup - (memopv4f32 addr:$src), (undef)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + (memopv4f32 addr:$src), (undef), + MOVSLDUP_shuffle_mask)))]>; def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movddup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src, (undef), + MOVDDUP_shuffle_mask)))]>; def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "movddup\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), - (undef))))]>; + (v2f64 (vector_shuffle + (scalar_to_vector (loadf64 addr:$src)), + (undef), MOVDDUP_shuffle_mask)))]>; -def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef)), +def : Pat<(vector_shuffle + (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef), MOVDDUP_shuffle_mask), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(movddup (memopv2f64 addr:$src), (undef)), +def : Pat<(vector_shuffle + (memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; @@ -2499,18 +2555,22 @@ def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait", // vector_shuffle v1, <1, 1, 3, 3> let AddedComplexity = 15 in -def : Pat<(v4i32 (movshdup VR128:$src, (undef))), +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + MOVSHDUP_shuffle_mask)), (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; let AddedComplexity = 20 in -def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), +def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), + MOVSHDUP_shuffle_mask)), (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; // vector_shuffle v1, <0, 0, 2, 2> let AddedComplexity = 15 in - def : Pat<(v4i32 (movsldup VR128:$src, (undef))), + def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + MOVSLDUP_shuffle_mask)), (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; let AddedComplexity = 20 in - def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), + MOVSLDUP_shuffle_mask)), (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; //===----------------------------------------------------------------------===// @@ -2851,173 +2911,207 @@ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), // Splat v2f64 / v2i64 let AddedComplexity = 10 in { -def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), +def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(unpckh (v2f64 VR128:$src), (undef)), +def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), +def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(unpckh (v2i64 VR128:$src), (undef)), +def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // Special unary SHUFPSrri case. -def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), + (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE1]>; -let AddedComplexity = 5 in -def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), - (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[HasSSE2]>; // Special unary SHUFPDrri case. -def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -// Special unary SHUFPDrri case. -def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), + (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; // Unary v4f32 shuffle with PSHUF* in order to fold a load. -def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), - (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef), + SHUFP_unary_shuffle_mask:$sm), + (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; // Special binary v4i32 shuffle cases with SHUFPS. -def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (SHUFPSrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2), + PSHUFD_binary_shuffle_mask:$sm)), + (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (SHUFPSrmi VR128:$src1, addr:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v4i32 (vector_shuffle VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)), + (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; // Special binary v2i64 shuffle cases using SHUFPDrri. -def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (SHUFPDrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, +def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$sm)), + (SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>, Requires<[HasSSE2]>; +// Special unary SHUFPDrri case. +def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), + (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, + Requires<[HasSSE2]>; // vector_shuffle v1, , <0, 0, 1, 1, ...> let AddedComplexity = 15 in { -def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask:$sm)), + (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask:$sm)), + (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, Requires<[OptForSpeed, HasSSE2]>; } let AddedComplexity = 10 in { -def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; -def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), +def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), +def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKL_v_undef_shuffle_mask)), (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // vector_shuffle v1, , <2, 2, 3, 3, ...> let AddedComplexity = 15 in { -def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask:$sm)), + (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask:$sm)), + (PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>, Requires<[OptForSpeed, HasSSE2]>; } let AddedComplexity = 10 in { -def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask)), (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; -def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), +def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask)), (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), +def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask)), (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), +def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), + UNPCKH_v_undef_shuffle_mask)), (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } let AddedComplexity = 20 in { // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHP_shuffle_mask)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS -def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHLPS_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS -def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), + MOVHLPS_v_undef_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src1)>; -def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef), + MOVHLPS_v_undef_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src1)>; } let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS // vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS -def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVLP_shuffle_mask)), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVLP_shuffle_mask)), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVHP_shuffle_mask)), (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))), +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVHP_shuffle_mask)), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVLP_shuffle_mask)), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), +def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVLP_shuffle_mask)), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVHP_shuffle_mask)), (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), +def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), + MOVHP_shuffle_mask)), (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS // (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS -def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVLP_shuffle_mask)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVLP_shuffle_mask)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVHP_shuffle_mask)), addr:$src1), (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVHP_shuffle_mask)), addr:$src1), (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), +def : Pat<(store (v4i32 (vector_shuffle + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, + MOVLP_shuffle_mask)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVLP_shuffle_mask)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), +def : Pat<(store (v4i32 (vector_shuffle + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, + MOVHP_shuffle_mask)), addr:$src1), (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), +def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, + MOVHP_shuffle_mask)), addr:$src1), (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; let AddedComplexity = 15 in { // Setting the lowest element in the vector. -def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVL_shuffle_mask)), (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), +def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + MOVL_shuffle_mask)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; // vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) -def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVLP_shuffle_mask)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + MOVLP_shuffle_mask)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; } // Set lowest element and zero upper elements. let AddedComplexity = 15 in -def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)), +def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src, + MOVL_shuffle_mask)), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; diff --git a/test/CodeGen/X86/vec_clear.ll b/test/CodeGen/X86/vec_clear.ll index 514de953efe..c119a94f74f 100644 --- a/test/CodeGen/X86/vec_clear.ll +++ b/test/CodeGen/X86/vec_clear.ll @@ -1,7 +1,5 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f -; RUN: not grep and %t -; RUN: not grep psrldq %t -; RUN: grep xorps %t +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep and +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | grep psrldq define <4 x float> @test(<4 x float>* %v1) nounwind { %tmp = load <4 x float>* %v1 ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll index 297469d9202..45789b04bb6 100644 --- a/test/CodeGen/X86/vec_shuffle-10.ll +++ b/test/CodeGen/X86/vec_shuffle-10.ll @@ -1,7 +1,9 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f -; RUN: grep unpcklps %t | count 1 -; RUN: grep pshufd %t | count 1 -; RUN: not grep {sub.*esp} %t +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \ +; RUN: grep unpcklps | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \ +; RUN: grep unpckhps | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \ +; RUN: not grep {sub.*esp} define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) { %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll index b3a5b769e67..79de903536d 100644 --- a/test/CodeGen/X86/vec_shuffle-16.ll +++ b/test/CodeGen/X86/vec_shuffle-16.ll @@ -1,10 +1,8 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t -f -; RUN: grep shufps %t | count 4 -; RUN: grep movaps %t | count 2 -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f -; RUN: grep pshufd %t | count 4 -; RUN: not grep shufps %t -; RUN: not grep mov %t +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 | grep shufps | count 4 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | grep mov | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 4 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep shufps +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind { %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll index 50a3df8f0b2..38f02fe238c 100644 --- a/test/CodeGen/X86/vec_shuffle-30.ll +++ b/test/CodeGen/X86/vec_shuffle-30.ll @@ -1,7 +1,8 @@ ; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f ; RUN: grep pshufhw %t | grep 161 | count 1 -; RUN: grep shufps %t | count 1 -; RUN: not grep pslldq %t +; RUN: grep pslldq %t | count 1 + + ; Test case when creating pshufhw, we incorrectly set the higher order bit ; for an undef, @@ -19,4 +20,4 @@ entry: %0 = shufflevector <4 x i32> %in, <4 x i32> , <4 x i32> < i32 undef, i32 5, i32 undef, i32 2> store <4 x i32> %0, <4 x i32>* %dest ret void -} +} \ No newline at end of file diff --git a/test/CodeGen/X86/vec_shuffle-31.ll b/test/CodeGen/X86/vec_shuffle-31.ll index efcd0300e35..0a9dc1fa7ba 100644 --- a/test/CodeGen/X86/vec_shuffle-31.ll +++ b/test/CodeGen/X86/vec_shuffle-31.ll @@ -1,6 +1,6 @@ ; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f ; RUN: grep pextrw %t | count 1 -; RUN: grep movlhps %t | count 1 +; RUN: grep punpcklqdq %t | count 1 ; RUN: grep pshufhw %t | count 1 ; RUN: grep pinsrw %t | count 1 ; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 52d8ca407c1..804d1df128a 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -194,6 +194,10 @@ SDTypeConstraint::SDTypeConstraint(Record *R) { ConstraintType = SDTCisOpSmallerThanOp; x.SDTCisOpSmallerThanOp_Info.BigOperandNum = R->getValueAsInt("BigOperandNum"); + } else if (R->isSubClassOf("SDTCisIntVectorOfSameSize")) { + ConstraintType = SDTCisIntVectorOfSameSize; + x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum = + R->getValueAsInt("OtherOpNum"); } else if (R->isSubClassOf("SDTCisEltOfVec")) { ConstraintType = SDTCisEltOfVec; x.SDTCisEltOfVec_Info.OtherOperandNum = @@ -361,9 +365,23 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N, } return MadeChange; } + case SDTCisIntVectorOfSameSize: { + TreePatternNode *OtherOperand = + getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum, + N, NumResults); + if (OtherOperand->hasTypeSet()) { + if (!isVector(OtherOperand->getTypeNum(0))) + TP.error(N->getOperator()->getName() + " VT operand must be a vector!"); + MVT IVT = OtherOperand->getTypeNum(0); + unsigned NumElements = IVT.getVectorNumElements(); + IVT = MVT::getIntVectorWithNumElements(NumElements); + return NodeToApply->UpdateNodeType(IVT.getSimpleVT(), TP); + } + return false; + } case SDTCisEltOfVec: { TreePatternNode *OtherOperand = - getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, + getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum, N, NumResults); if (OtherOperand->hasTypeSet()) { if (!isVector(OtherOperand->getTypeNum(0))) @@ -907,6 +925,25 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { if (NI.getNumResults() == 0) MadeChange |= UpdateNodeType(MVT::isVoid, TP); + // If this is a vector_shuffle operation, apply types to the build_vector + // operation. The types of the integers don't matter, but this ensures they + // won't get checked. + if (getOperator()->getName() == "vector_shuffle" && + getChild(2)->getOperator()->getName() == "build_vector") { + TreePatternNode *BV = getChild(2); + const std::vector &LegalVTs + = CDP.getTargetInfo().getLegalValueTypes(); + MVT::SimpleValueType LegalIntVT = MVT::Other; + for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i) + if (isInteger(LegalVTs[i]) && !isVector(LegalVTs[i])) { + LegalIntVT = LegalVTs[i]; + break; + } + assert(LegalIntVT != MVT::Other && "No legal integer VT?"); + + for (unsigned i = 0, e = BV->getNumChildren(); i != e; ++i) + MadeChange |= BV->getChild(i)->UpdateNodeType(LegalIntVT, TP); + } return MadeChange; } else if (getOperator()->isSubClassOf("Instruction")) { const DAGInstruction &Inst = CDP.getInstruction(getOperator()); @@ -2049,9 +2086,6 @@ void CodeGenDAGPatterns::ParsePatterns() { IterateInference |= Result->getTree(0)-> UpdateNodeType(Pattern->getTree(0)->getExtTypes(), *Result); } while (IterateInference); - - // Blah? - Result->getTree(0)->setTransformFn(Pattern->getTree(0)->getTransformFn()); // Verify that we inferred enough types that we can do something with the // pattern and result. If these fire the user has to add type casts. diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h index 9ce14dcc7b3..f1b0d37605b 100644 --- a/utils/TableGen/CodeGenDAGPatterns.h +++ b/utils/TableGen/CodeGenDAGPatterns.h @@ -62,7 +62,8 @@ struct SDTypeConstraint { unsigned OperandNo; // The operand # this constraint applies to. enum { SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisSameAs, - SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec + SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisIntVectorOfSameSize, + SDTCisEltOfVec } ConstraintType; union { // The discriminated union. @@ -78,6 +79,9 @@ struct SDTypeConstraint { struct { unsigned BigOperandNum; } SDTCisOpSmallerThanOp_Info; + struct { + unsigned OtherOperandNum; + } SDTCisIntVectorOfSameSize_Info; struct { unsigned OtherOperandNum; } SDTCisEltOfVec_Info;