mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-02 07:32:52 +00:00
X86: Custom lower <2 x i64> eq and ne when SSE41 is not available.
pcmpeqd, pshufd, pshufd, pand is cheaper than unpack + cmpq, sbbq, cmpq, sbbq + pack. Small speedup on loop-vectorized viterbi (-march=core2). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171063 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4684858624
commit
382ed78d3f
@ -9171,8 +9171,30 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (VT == MVT::v2i64) {
|
||||
if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
|
||||
return SDValue();
|
||||
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
|
||||
return SDValue();
|
||||
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
|
||||
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
|
||||
// pcmpeqd + 2 shuffles + pand.
|
||||
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
|
||||
|
||||
// First cast everything to the right type,
|
||||
Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
|
||||
Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
|
||||
|
||||
// Do the compare.
|
||||
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
|
||||
|
||||
// Make sure the lower and upper halves are both all-ones.
|
||||
const int Mask1[] = { 0, 0, 2, 2 };
|
||||
SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1);
|
||||
const int Mask2[] = { 1, 1, 3, 3 };
|
||||
SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2);
|
||||
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2);
|
||||
|
||||
if (Invert)
|
||||
Result = DAG.getNOT(dl, Result, MVT::v4i32);
|
||||
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
|
||||
}
|
||||
}
|
||||
|
||||
// Since SSE has no unsigned integer comparisons, we need to flip the sign
|
||||
|
@ -41,3 +41,29 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
%D = sext <4 x i1> %C to <4 x i32>
|
||||
ret <4 x i32> %D
|
||||
}
|
||||
|
||||
define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
|
||||
; CHECK: test5:
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pshufd $-11
|
||||
; CHECK: pshufd $-96
|
||||
; CHECK: pand
|
||||
; CHECK: ret
|
||||
%C = icmp eq <2 x i64> %A, %B
|
||||
%D = sext <2 x i1> %C to <2 x i64>
|
||||
ret <2 x i64> %D
|
||||
}
|
||||
|
||||
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
|
||||
; CHECK: test6:
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pshufd $-11
|
||||
; CHECK: pshufd $-96
|
||||
; CHECK: pand
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pxor
|
||||
; CHECK: ret
|
||||
%C = icmp ne <2 x i64> %A, %B
|
||||
%D = sext <2 x i1> %C to <2 x i64>
|
||||
ret <2 x i64> %D
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user