mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-04 10:30:01 +00:00
[x86] Teach the new vector shuffle lowering to also use pmovzx for v4i32
shuffles that are zext-ing. Not a lot to see here; the undef lane variant is better handled with pshufd, but this improves the actual zext pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218112 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
42b96889d1
commit
ec1f7b1c87
@ -7841,6 +7841,13 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
getV4X86ShuffleImm8ForMask(Mask, DAG));
|
||||
}
|
||||
|
||||
// Whenever we can lower this as a zext, that instruction is strictly faster
|
||||
// than any alternative.
|
||||
if (Subtarget->hasSSE41())
|
||||
if (SDValue ZExt =
|
||||
lowerVectorShuffleAsZeroExtend(DL, MVT::v4i32, V1, V2, Mask, DAG))
|
||||
return ZExt;
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
|
||||
@ -8517,7 +8524,6 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
OrigMask, DAG))
|
||||
return ZExt;
|
||||
|
||||
|
||||
auto isV1 = [](int M) { return M >= 0 && M < 8; };
|
||||
auto isV2 = [](int M) { return M >= 8; };
|
||||
|
||||
|
@ -765,3 +765,47 @@ define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
ret <4 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
|
||||
; ALL-LABEL: @shuffle_v4i32_0u1u
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
|
||||
ret <4 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
|
||||
; SSE2-LABEL: @shuffle_v4i32_0z1z
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]]
|
||||
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
|
||||
; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: @shuffle_v4i32_0z1z
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]]
|
||||
; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
|
||||
; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: @shuffle_v4i32_0z1z
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]]
|
||||
; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
|
||||
; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: @shuffle_v4i32_0z1z
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: @shuffle_v4i32_0z1z
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
|
||||
ret <4 x i32> %shuffle
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user