diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b372950d436..61accea84d1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7914,6 +7914,47 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask); } + // Check for a shuffle of a splat, and return just the splat. While DAG + // combining will do a similar transformation, this shows up with the + // internally created shuffles and so we handle it specially here as we won't + // have another chance to DAG-combine the generic shuffle instructions. + if (V2IsUndef) { + SDValue V = V1; + + // Look through any bitcasts. These can't change the size, just the number + // of elements which we check later. + while (V.getOpcode() == ISD::BITCAST) + V = V->getOperand(0); + + // A splat should always show up as a build vector node. + if (V.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != V->getNumOperands(); ++i) + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + // Splat of , return + if (!Base) + return V1; + for (unsigned i = 0; i != V->getNumOperands(); ++i) + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + // Splat of , return , provided that the + // number of elements match or the value splatted is a zero constant. + if (AllSame) { + if (V.getValueType().getVectorNumElements() == (unsigned)NumElements) + return V1; + if (auto *C = dyn_cast(Base)) + if (C->isNullValue()) + return V1; + } + } + } + // For integer vector shuffles, try to collapse them into a shuffle of fewer // lanes but wider integers. We cap this to not form integers larger than i64 // but it might be interesting to form i128 integers to handle flipping the diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index f392a6221c5..0c317c3dd22 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -172,3 +172,20 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20( %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } + +define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) { +; CHECK-SSE2-LABEL: @zext_to_v8i16_shuffle +; CHECK-SSE2: pxor %xmm1, %xmm1 +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 + %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) { +; CHECK-SSE2-LABEL: @zext_to_v4i32_shuffle +; CHECK-SSE2: pxor %xmm1, %xmm1 +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 + %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %shuffle +}