mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-25 00:33:15 +00:00
[x86] Teach the new vector shuffle lowering of v4f64 to prefer a direct
VBLENDPD over using VSHUFPD. While the 256-bit variant of VBLENDPD slows down to the same speed as VSHUFPD on Sandy Bridge CPUs, it has twice the reciprocal throughput on Ivy Bridge CPUs much like it does everywhere for 128-bits. There isn't a downside, so just eagerly use this instruction when it suffices. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218208 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0dd52092d0
commit
29720a4bad
@ -7237,6 +7237,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
|
||||
switch (VT.SimpleTy) {
|
||||
case MVT::v2f64:
|
||||
case MVT::v4f32:
|
||||
case MVT::v4f64:
|
||||
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
|
||||
DAG.getConstant(BlendMask, MVT::i8));
|
||||
|
||||
@ -9229,6 +9230,10 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
if (isShuffleEquivalent(Mask, 5, 1, 7, 3))
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
|
||||
|
||||
if (SDValue Blend =
|
||||
lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, DAG))
|
||||
return Blend;
|
||||
|
||||
// Check if the blend happens to exactly fit that of SHUFPD.
|
||||
if (Mask[0] < 4 && (Mask[1] == -1 || Mask[1] >= 4) &&
|
||||
Mask[2] < 4 && (Mask[3] == -1 || Mask[3] >= 4)) {
|
||||
|
@ -359,7 +359,7 @@ define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
|
||||
define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
|
||||
; ALL-LABEL: @shuffle_v4f64_0527
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vshufpd {{.*}} # ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
||||
; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x double> %shuffle
|
||||
@ -368,7 +368,7 @@ define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
|
||||
define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
|
||||
; ALL-LABEL: @shuffle_v4f64_4163
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vshufpd {{.*}} # ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
|
||||
; ALL-NEXT: vblendpd {{.*}} # ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
|
||||
ret <4 x double> %shuffle
|
||||
|
Loading…
x
Reference in New Issue
Block a user