diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 70d26ee1561..5f72d5004fd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15122,7 +15122,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, (VT == MVT::v8i32 && Subtarget->hasInt256())); // Get the high parts. - const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8}; + const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1}; SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); @@ -15138,10 +15138,18 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1)); // Shuffle it back into the right order. - const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15}; - SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); - const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14}; - SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + SDValue Highs, Lows; + if (VT == MVT::v8i32) { + const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15}; + Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14}; + Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + } else { + const int HighMask[] = {1, 5, 3, 7}; + Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 4, 2, 6}; + Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + } // If we have a signed multiply but no PMULDQ fix up the high parts of a // unsigned multiply. diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll index 4c30184a542..b6d43e985f0 100644 --- a/test/CodeGen/X86/vector-idiv.ll +++ b/test/CodeGen/X86/vector-idiv.ll @@ -8,7 +8,7 @@ define <4 x i32> @test1(<4 x i32> %a) { ; SSE41-LABEL: test1: ; SSE41: pmuludq -; SSE41: pshufd $57 +; SSE41: pshufd $49 ; SSE41: pmuludq ; SSE41: shufps $-35 ; SSE41: psubd @@ -18,7 +18,7 @@ define <4 x i32> @test1(<4 x i32> %a) { ; AVX-LABEL: test1: ; AVX: vpmuludq -; AVX: vpshufd $57 +; AVX: vpshufd $49 ; AVX: vpmuludq ; AVX: vshufps $-35 ; AVX: vpsubd @@ -32,11 +32,11 @@ define <8 x i32> @test2(<8 x i32> %a) { ret <8 x i32> %div ; AVX-LABEL: test2: -; AVX: vpermd +; AVX: vpbroadcastd +; AVX: vpalignr $4 ; AVX: vpmuludq -; AVX: vshufps $-35 ; AVX: vpmuludq -; AVX: vshufps $-35 +; AVX: vpblendd $170 ; AVX: vpsubd ; AVX: vpsrld $1 ; AVX: vpadd @@ -107,6 +107,12 @@ define <16 x i16> @test6(<16 x i16> %a) { define <16 x i8> @test7(<16 x i8> %a) { %div = sdiv <16 x i8> %a, ret <16 x i8> %div + +; FIXME: scalarized +; SSE41-LABEL: test7: +; SSE41: pext +; AVX-LABEL: test7: +; AVX: pext } define <4 x i32> @test8(<4 x i32> %a) { @@ -115,8 +121,8 @@ define <4 x i32> @test8(<4 x i32> %a) { ; SSE41-LABEL: test8: ; SSE41: pmuldq -; SSE41: pshufd $57 -; SSE41-NOT: pshufd $57 +; SSE41: pshufd $49 +; SSE41-NOT: pshufd $49 ; SSE41: pmuldq ; SSE41: shufps $-35 ; SSE41: pshufd $-40 @@ -130,8 +136,8 @@ define <4 x i32> @test8(<4 x i32> %a) { ; SSE: pand ; SSE: paddd ; SSE: pmuludq -; SSE: pshufd $57 -; SSE-NOT: pshufd $57 +; SSE: pshufd $49 +; SSE-NOT: pshufd $49 ; SSE: pmuludq ; SSE: shufps $-35 ; SSE: pshufd $-40 @@ -143,8 +149,8 @@ define <4 x i32> @test8(<4 x i32> %a) { ; AVX-LABEL: test8: ; AVX: vpmuldq -; AVX: vpshufd $57 -; AVX-NOT: vpshufd $57 +; AVX: vpshufd $49 +; AVX-NOT: vpshufd $49 ; AVX: vpmuldq ; AVX: vshufps $-35 ; AVX: vpshufd $-40 @@ -159,12 +165,11 @@ define <8 x i32> @test9(<8 x i32> %a) { ret <8 x i32> %div ; AVX-LABEL: test9: +; AVX: vpalignr $4 ; AVX: vpbroadcastd ; AVX: vpmuldq -; AVX: vshufps $-35 ; AVX: vpmuldq -; AVX: vshufps $-35 -; AVX: vpshufd $-40 +; AVX: vpblendd $170 ; AVX: vpadd ; AVX: vpsrld $31 ; AVX: vpsrad $2 @@ -177,10 +182,10 @@ define <8 x i32> @test10(<8 x i32> %a) { ; AVX-LABEL: test10: ; AVX: vpbroadcastd +; AVX: vpalignr $4 ; AVX: vpmuludq -; AVX: vshufps $-35 ; AVX: vpmuludq -; AVX: vshufps $-35 +; AVX: vpblendd $170 ; AVX: vpsubd ; AVX: vpsrld $1 ; AVX: vpadd @@ -193,12 +198,11 @@ define <8 x i32> @test11(<8 x i32> %a) { ret <8 x i32> %rem ; AVX-LABEL: test11: +; AVX: vpalignr $4 ; AVX: vpbroadcastd ; AVX: vpmuldq -; AVX: vshufps $-35 ; AVX: vpmuldq -; AVX: vshufps $-35 -; AVX: vpshufd $-40 +; AVX: vpblendd $170 ; AVX: vpadd ; AVX: vpsrld $31 ; AVX: vpsrad $2