diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 394a6221056..947cd01308b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8214,6 +8214,13 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); + // Whenever we can lower this as a zext, that instruction is strictly faster + // than any alternative. It also allows us to fold memory operansd into the + // shuffle in many cases. + if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, + Mask, Subtarget, DAG)) + return ZExt; + int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); @@ -8239,12 +8246,6 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, getV4X86ShuffleImm8ForMask(Mask, DAG)); } - // Whenever we can lower this as a zext, that instruction is strictly faster - // than any alternative. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, - Mask, Subtarget, DAG)) - return ZExt; - // Use dedicated unpack instructions for masks that match their pattern. if (isShuffleEquivalent(Mask, 0, 4, 1, 5)) return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2); diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 080cd439476..948f1d2e47a 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -937,14 +937,29 @@ define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) { } define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) { -; SSE-LABEL: shuffle_v4i32_0u1u: -; SSE: # BB#0: -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; SSE-NEXT: retq +; SSE2-LABEL: shuffle_v4i32_0u1u: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v4i32_0u1u: +; SSE3: # BB#0: +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v4i32_0u1u: +; SSSE3: # BB#0: +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v4i32_0u1u: +; SSE41: # BB#0: +; SSE41-NEXT: pmovzxdq %xmm0, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_0u1u: ; AVX: # BB#0: -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle