diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9d507d349de..95247f32a05 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4898,8 +4898,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + SelectionDAG &DAG, DebugLoc dl) { EVT VT = SVOp->getValueType(0); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); @@ -5252,6 +5251,48 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { return 0; } +static +SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + ShuffleVectorSDNode *SVOp = cast(Op); + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + + if (isZeroShuffle(SVOp)) + return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + + // Promote splats to v4f32. + if (SVOp->isSplat()) + return PromoteSplat(SVOp, DAG); + + // If the shuffle can be profitably rewritten as a narrower shuffle, then + // do it! + if (VT == MVT::v8i16 || VT == MVT::v16i8) { + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + if (NewOp.getNode()) + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, NewOp); + } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { + // FIXME: Figure out a cleaner way to do this. + // Try to make use of movq to zero out the top part. + if (ISD::isBuildVectorAllZeros(V2.getNode())) { + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + if (NewOp.getNode()) { + if (isCommutedMOVL(cast(NewOp), true, false)) + return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), + DAG, Subtarget, dl); + } + } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + if (NewOp.getNode() && X86::isMOVLMask(cast(NewOp))) + return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), + DAG, Subtarget, dl); + } + } + return SDValue(); +} + SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast(Op); @@ -5278,37 +5319,26 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (isMMX && SVOp->isSplat()) return Op; - if (isZeroShuffle(SVOp)) - return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + // Vector shuffle lowering takes 3 steps: + // + // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable + // narrowing and commutation of operands should be handled. + // 2) Matching of shuffles with known shuffle masks to x86 target specific + // shuffle nodes. + // 3) Rewriting of unmatched masks into new generic shuffle operations, + // so the shuffle can be broken into other shuffles and the legalizer can + // try the lowering again. + // + // The general ideia is that no vector_shuffle operation should be left to + // be matched during isel, all of them must be converted to a target specific + // node here. - // Promote splats to v4f32. - if (SVOp->isSplat()) - return PromoteSplat(SVOp, DAG); - - // If the shuffle can be profitably rewritten as a narrower shuffle, then - // do it! - if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); - if (NewOp.getNode()) - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - LowerVECTOR_SHUFFLE(NewOp, DAG)); - } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { - // FIXME: Figure out a cleaner way to do this. - // Try to make use of movq to zero out the top part. - if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); - if (NewOp.getNode()) { - if (isCommutedMOVL(cast(NewOp), true, false)) - return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), - DAG, Subtarget, dl); - } - } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); - if (NewOp.getNode() && X86::isMOVLMask(cast(NewOp))) - return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), - DAG, Subtarget, dl); - } - } + // Normalize the input vectors. Here splats, zeroed vectors, profitable + // narrowing and commutation of operands should be handled. The actual code + // doesn't include all of those, work in progress... + SDValue NewOp = NormalizeVectorShuffle(Op, DAG, Subtarget); + if (NewOp.getNode()) + return NewOp; // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size.