diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 747aa43df26..857109119fb 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11770,8 +11770,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // Don't try to fold shuffles with illegal type. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - TLI.isTypeLegal(VT)) { + // Only fold if this shuffle is the only user of the other shuffle. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && + Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast(N0); // The incoming shuffle must be of the same type as the result of the diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll index b870c366181..3b1e1892c07 100644 --- a/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2537,3 +2537,28 @@ define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) { %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> ret <4 x float> %d } + +define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) #0 { +; SSE-LABEL: PR22377: +; SSE: # BB#0: # %entry +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3,1,3] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2] +; SSE-NEXT: addps %xmm0, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; AVX-LABEL: PR22377: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] +; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: retq +entry: + %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %s2 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %r2 = fadd <4 x float> %s1, %s2 + %s3 = shufflevector <4 x float> %s2, <4 x float> %r2, <4 x i32> + ret <4 x float> %s3 +}