diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5f7f9150977..a173712b5bc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9171,8 +9171,30 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (VT == MVT::v2i64) { if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) return SDValue(); - if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) - return SDValue(); + if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { + // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with + // pcmpeqd + 2 shuffles + pand. + assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); + + // First cast everything to the right type, + Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + + // Do the compare. + SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); + + // Make sure the lower and upper halves are both all-ones. + const int Mask1[] = { 0, 0, 2, 2 }; + SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1); + const int Mask2[] = { 1, 1, 3, 3 }; + SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2); + Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2); + + if (Invert) + Result = DAG.getNOT(dl, Result, MVT::v4i32); + + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } } // Since SSE has no unsigned integer comparisons, we need to flip the sign diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll index 367dd27f307..cf86c737c73 100644 --- a/test/CodeGen/X86/vec_compare.ll +++ b/test/CodeGen/X86/vec_compare.ll @@ -41,3 +41,29 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind { %D = sext <4 x i1> %C to <4 x i32> ret <4 x i32> %D } + +define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind { +; CHECK: test5: +; CHECK: pcmpeqd +; CHECK: pshufd $-11 +; CHECK: pshufd $-96 +; CHECK: pand +; CHECK: ret + %C = icmp eq <2 x i64> %A, %B + %D = sext <2 x i1> %C to <2 x i64> + ret <2 x i64> %D +} + +define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind { +; CHECK: test6: +; CHECK: pcmpeqd +; CHECK: pshufd $-11 +; CHECK: pshufd $-96 +; CHECK: pand +; CHECK: pcmpeqd +; CHECK: pxor +; CHECK: ret + %C = icmp ne <2 x i64> %A, %B + %D = sext <2 x i1> %C to <2 x i64> + ret <2 x i64> %D +}