diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9d91b3e6443..28031bb43b2 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10780,11 +10780,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // Try to fold according to rules: - // shuffle(shuffle A, B, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle A, B, M0), A, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) // Don't try to fold shuffles with illegal type. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - TLI.isTypeLegal(VT)) { + N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast(N0); // The incoming shuffle must be of the same type as the result of the @@ -10795,7 +10797,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue SV0 = OtherSV->getOperand(0); SDValue SV1 = OtherSV->getOperand(1); bool HasSameOp0 = N1 == SV0; - if (!HasSameOp0 && N1 != SV1) + bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + if (!HasSameOp0 && !IsSV1Undef && N1 != SV1) // Early exit. return SDValue(); @@ -10810,17 +10813,24 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { continue; } - if (Idx < (int)NumElts) + if (Idx < (int)NumElts) { Idx = OtherSV->getMaskElt(Idx); - else + if (IsSV1Undef && Idx >= (int) NumElts) + Idx = -1; // Propagate Undef. + } else Idx = HasSameOp0 ? Idx - NumElts : Idx; Mask.push_back(Idx); } // Avoid introducing shuffles with illegal mask. - if (TLI.isShuffleMaskLegal(Mask, VT)) + if (TLI.isShuffleMaskLegal(Mask, VT)) { + if (IsSV1Undef) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]); return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + } } return SDValue(); diff --git a/test/CodeGen/X86/combine-vec-shuffle-4.ll b/test/CodeGen/X86/combine-vec-shuffle-4.ll new file mode 100644 index 00000000000..45c624a61d4 --- /dev/null +++ b/test/CodeGen/X86/combine-vec-shuffle-4.ll @@ -0,0 +1,122 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s + +; Verify that we fold shuffles according to rule: +; (shuffle(shuffle A, Undef, M0), B, M1) -> (shuffle A, B, M2) + +define <4 x float> @test1(<4 x float> %a, <4 x float> %b) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test1 +; Mask: [4,5,2,3] +; CHECK: movsd +; CHECK: ret + +define <4 x float> @test2(<4 x float> %a, <4 x float> %b) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test2 +; Mask: [0,1,4,5] +; CHECK: movlhps +; CHECK: ret + +define <4 x float> @test3(<4 x float> %a, <4 x float> %b) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test3 +; Mask: [0,1,4,u] +; CHECK: movlhps +; CHECK: ret + +define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} +; FIXME: this should be lowered as a single movhlps. However, the backend +; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we +; end up with the sub-optimal sequence 'movhlps, palignr'. +; CHECK-LABEL: test4 +; Mask: [6,7,2,3] +; CHECK: movhlps +; CHECK: palignr $8 +; CHECK: ret + +define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test5 +; Mask: [0,1,6,7] +; CHECK: blendps $12 +; CHECK: ret + +; Verify that we fold shuffles according to rule: +; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2) + +define <4 x float> @test6(<4 x float> %a) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test6 +; Mask: [0,1,2,3] +; CHECK-NOT: pshufd +; CHECK-NOT: shufps +; CHECK-NOT: movlhps +; CHECK: ret + +define <4 x float> @test7(<4 x float> %a) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test7 +; Mask: [0,1,0,1] +; CHECK-NOT: pshufd +; CHECK-NOT: shufps +; CHECK: movlhps +; CHECK-NEXT: ret + +define <4 x float> @test8(<4 x float> %a) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test8 +; Mask: [0,1,0,u] +; CHECK-NOT: pshufd +; CHECK-NOT: shufps +; CHECK: movlhps +; CHECK-NEXT: ret + +define <4 x float> @test9(<4 x float> %a) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test9 +; Mask: [2,3,2,3] +; CHECK-NOT: movlhps +; CHECK-NOT: palignr +; CHECK: movhlps +; CHECK-NEXT: ret + +define <4 x float> @test10(<4 x float> %a) { + %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} +; CHECK-LABEL: test10 +; Mask: [0,1,2,3] +; CHECK-NOT: pshufd +; CHECK-NOT: shufps +; CHECK-NOT: movlhps +; CHECK: ret +