X86: Custom lower <2 x i64> eq and ne when SSE41 is not available.

pcmpeqd, pshufd, pshufd, pand is cheaper than unpack + cmpq, sbbq, cmpq, sbbq + pack.
Small speedup on loop-vectorized viterbi (-march=core2).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171063 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2012-12-25 12:54:19 +00:00
parent 4684858624
commit 382ed78d3f
2 changed files with 50 additions and 2 deletions

View File

@ -9171,8 +9171,30 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::v2i64) {
if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
return SDValue();
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
return SDValue();
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
// pcmpeqd + 2 shuffles + pand.
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
// First cast everything to the right type,
Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
// Do the compare.
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
// Make sure the lower and upper halves are both all-ones.
const int Mask1[] = { 0, 0, 2, 2 };
SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1);
const int Mask2[] = { 1, 1, 3, 3 };
SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2);
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2);
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
}
// Since SSE has no unsigned integer comparisons, we need to flip the sign

View File

@ -41,3 +41,29 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
%D = sext <4 x i1> %C to <4 x i32>
ret <4 x i32> %D
}
define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: test5:
; CHECK: pcmpeqd
; CHECK: pshufd $-11
; CHECK: pshufd $-96
; CHECK: pand
; CHECK: ret
%C = icmp eq <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
ret <2 x i64> %D
}
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: test6:
; CHECK: pcmpeqd
; CHECK: pshufd $-11
; CHECK: pshufd $-96
; CHECK: pand
; CHECK: pcmpeqd
; CHECK: pxor
; CHECK: ret
%C = icmp ne <2 x i64> %A, %B
%D = sext <2 x i1> %C to <2 x i64>
ret <2 x i64> %D
}