diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e8cbd8af7a8..603ccb090ed 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10838,16 +10838,30 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // It may still be beneficial to combine the two shuffles if the // resulting shuffle is legal. - if (TLI.isTypeLegal(VT) && TLI.isShuffleMaskLegal(Mask, VT)) { - if (!CommuteOperands) - // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, - &Mask[0]); - - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), - &Mask[0]); + if (TLI.isTypeLegal(VT)) { + if (!CommuteOperands) { + if (TLI.isShuffleMaskLegal(Mask, VT)) + // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, + &Mask[0]); + } else { + // Compute the commuted shuffle mask. + for (unsigned i = 0; i != NumElts; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElts) + Mask[i] = idx + NumElts; + else + Mask[i] = idx - NumElts; + } + + if (TLI.isShuffleMaskLegal(Mask, VT)) + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1, + &Mask[0]); + } } } diff --git a/test/CodeGen/X86/combine-vec-shuffle-2.ll b/test/CodeGen/X86/combine-vec-shuffle-2.ll index 877d38260d6..872384ca0a1 100644 --- a/test/CodeGen/X86/combine-vec-shuffle-2.ll +++ b/test/CodeGen/X86/combine-vec-shuffle-2.ll @@ -204,8 +204,8 @@ define <4 x i32> @test18(<4 x i32> %A, <4 x i32> %B) { ret <4 x i32> %2 } ; CHECK-LABEL: test18 -; CHECK: blendps $11 -; CHECK-NEXT: pshufd $-59 +; CHECK-NOT: blendps +; CHECK: pshufd {{.*}} # xmm0 = xmm1[1,1,0,3] ; CHECK-NEXT: ret define <4 x i32> @test19(<4 x i32> %A, <4 x i32> %B) { @@ -240,6 +240,8 @@ define <4 x i32> @test21(<4 x i32> %A, <4 x i32> %B) { ; CHECK-NEXT: pshufd $-60 ; CHECK-NEXT: ret +; Test that we correctly combine shuffles according to rule +; shuffle(shuffle(x, y), undef) -> shuffle(y, undef) define <4 x i32> @test22(<4 x i32> %A, <4 x i32> %B) { %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> @@ -247,7 +249,69 @@ define <4 x i32> @test22(<4 x i32> %A, <4 x i32> %B) { ret <4 x i32> %2 } ; CHECK-LABEL: test22 -; CHECK: blendps $11 -; CHECK-NEXT: pshufd $-43 +; CHECK-NOT: blendps +; CHECK: pshufd {{.*}} # xmm0 = xmm1[1,1,1,3] +; CHECK-NEXT: ret + +define <4 x i32> @test23(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test23 +; CHECK-NOT: blendps +; CHECK: pshufd {{.*}} # xmm0 = xmm1[0,1,0,3] +; CHECK-NEXT: ret + +define <4 x i32> @test24(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test24 +; CHECK-NOT: blendps +; CHECK: pshufd {{.*}} # xmm0 = xmm1[0,3,2,0] +; CHECK-NEXT: ret + +define <4 x i32> @test25(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test25 +; CHECK-NOT: shufps +; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; CHECK-NEXT: ret + +define <4 x i32> @test26(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test26 +; CHECK-NOT: shufps +; CHECK: movhlps {{.*}} # xmm0 = xmm0[1,1] +; CHECK-NEXT: ret + +define <4 x i32> @test27(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test27 +; CHECK-NOT: shufps +; CHECK-NOT: movhlps +; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; CHECK-NEXT: ret + +define <4 x i32> @test28(<4 x i32> %A, <4 x i32> %B) { + %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test28 +; CHECK-NOT: shufps +; CHECK-NOT: movhlps +; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,1,0] ; CHECK-NEXT: ret