mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-08 03:30:22 +00:00
[X86] Always prefer to lower a VECTOR_SHUFFLE into a BLENDI instead of SHUFP (or VPERM2X128).
This patch teaches method 'LowerVECTOR_SHUFFLE' to give higher precedence to the check for 'isBlendMask'; the idea is that, when possible, we should firstly check if a shuffle performs a blend, and in case, try to lower it into a BLENDI instead of selecting a SHUFP or (worse) a VPERM2X128. In general: - AVX VBLENDPS/D always have better latency and throughput than VPERM2F128; - BLENDPS/D instructions tend to always have better 'reciprocal throughput' than the equivalent SHUFPS/D; - Both BLENDPS/D and SHUFPS/D are often decoded into the same number of m-ops; however, a m-op obtained from a BLENDPS/D can be scheduled to more than one execution port. This patch: - Moves the check for 'isBlendMask' immediately before the check for 'isSHUFPMask' within method 'LowerVECTOR_SHUFFLE'; - Updates existing tests for sse/avx shuffle/blend instructions to verify that we select (v)blendps/d when possible (instead of (v)shufps/d or vperm2f128). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211720 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
75f1d664b4
commit
cae1ea691d
@ -8337,6 +8337,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
getShufflePSHUFLWImmediate(SVOp),
|
||||
DAG);
|
||||
|
||||
unsigned MaskValue;
|
||||
if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
|
||||
&MaskValue))
|
||||
return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
|
||||
|
||||
if (isSHUFPMask(M, VT))
|
||||
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
|
||||
getShuffleSHUFImmediate(SVOp), DAG);
|
||||
@ -8374,11 +8379,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
|
||||
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
|
||||
|
||||
unsigned MaskValue;
|
||||
if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
|
||||
&MaskValue))
|
||||
return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
|
||||
|
||||
if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
|
||||
return getINSERTPS(SVOp, dl, DAG);
|
||||
|
||||
|
@ -5374,8 +5374,8 @@ let Predicates = [HasAVX] in {
|
||||
// - the 1st and 3rd element from the first input vector (the 'fsub' node);
|
||||
// - the 2nd and 4th element from the second input vector (the 'fadd' node).
|
||||
|
||||
def : Pat<(v4f64 (X86Shufp (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
||||
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
|
||||
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
||||
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
|
||||
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
|
||||
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
||||
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
|
||||
|
@ -110,7 +110,7 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
|
||||
|
||||
;CHECK-LABEL: vsel_double4:
|
||||
;CHECK-NOT: vinsertf128
|
||||
;CHECK: vshufpd $10
|
||||
;CHECK: vblendpd $10
|
||||
;CHECK-NEXT: ret
|
||||
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
|
||||
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
|
||||
|
@ -25,7 +25,7 @@ define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
|
||||
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
|
||||
ret <4 x i64> %c
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: vperm2f128
|
||||
; CHECK: vblendpd
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK: _B
|
||||
; CHECK: vperm2f128 $48
|
||||
; CHECK: vblendps $240
|
||||
define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
||||
|
@ -32,14 +32,14 @@ entry:
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: vshufpd $10, %ymm
|
||||
; CHECK: vblendpd $10, %ymm
|
||||
define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: vshufpd $10, (%{{.*}}), %ymm
|
||||
; CHECK: vblendpd $10, (%{{.*}}), %ymm
|
||||
define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%a2 = load <4 x double>* %a
|
||||
@ -48,14 +48,14 @@ entry:
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: vshufpd $10, %ymm
|
||||
; CHECK: vblendpd $10, %ymm
|
||||
define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: vshufpd $10, (%{{.*}}), %ymm
|
||||
; CHECK: vblendpd $10, (%{{.*}}), %ymm
|
||||
define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%a2 = load <4 x i64>* %a
|
||||
@ -71,7 +71,7 @@ entry:
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: vshufpd $2, %ymm
|
||||
; CHECK: vblendpd $2, %ymm
|
||||
define <4 x double> @D(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
|
||||
|
@ -74,7 +74,7 @@ define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
|
||||
}
|
||||
; CHECK-LABEL: test6
|
||||
; CHECK-NOT: xorps
|
||||
; CHECK: shufps
|
||||
; CHECK: blendps $12
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
@ -86,7 +86,7 @@ define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
|
||||
}
|
||||
; CHECK-LABEL: test7
|
||||
; CHECK-NOT: xorps
|
||||
; CHECK: shufps
|
||||
; CHECK: blendps $12
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user