mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-12 07:37:34 +00:00
[x86] Teach the new AVX v4f64 shuffle lowering to use UNPCK instructions
where applicable for blending. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215737 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f92cdd62c0
commit
92ee945e2e
@ -7055,6 +7055,35 @@ static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
|
||||
///
|
||||
/// See its documentation for details.
|
||||
static bool isShuffleEquivalentImpl(ArrayRef<int> Mask,
|
||||
ArrayRef<const int *> Args) {
|
||||
if (Mask.size() != Args.size())
|
||||
return false;
|
||||
for (int i = 0, e = Mask.size(); i < e; ++i) {
|
||||
assert(*Args[i] >= 0 && "Arguments must be positive integers!");
|
||||
assert(*Args[i] < (int)Args.size() * 2 &&
|
||||
"Argument outside the range of possible shuffle inputs!");
|
||||
if (Mask[i] != -1 && Mask[i] != *Args[i])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
|
||||
/// arguments.
|
||||
///
|
||||
/// This is a fast way to test a shuffle mask against a fixed pattern:
|
||||
///
|
||||
/// if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... }
|
||||
///
|
||||
/// It returns true if the mask is exactly as wide as the argument list, and
|
||||
/// each element of the mask is either -1 (signifying undef) or the value given
|
||||
/// in the argument.
|
||||
static const VariadicFunction1<
|
||||
bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
|
||||
|
||||
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
|
||||
///
|
||||
/// This helper function produces an 8-bit shuffle immediate corresponding to
|
||||
@ -8440,6 +8469,19 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
DAG.getConstant(VPERMILPMask, MVT::i8));
|
||||
}
|
||||
|
||||
// X86 has dedicated unpack instructions that can handle specific blend
|
||||
// operations: UNPCKH and UNPCKL.
|
||||
if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
|
||||
if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
|
||||
// FIXME: It would be nice to find a way to get canonicalization to commute
|
||||
// these patterns.
|
||||
if (isShuffleEquivalent(Mask, 4, 0, 6, 2))
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
|
||||
if (isShuffleEquivalent(Mask, 5, 1, 7, 3))
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
|
||||
|
||||
// Check if the blend happens to exactly fit that of SHUFPD.
|
||||
if (Mask[0] < 4 && (Mask[1] == -1 || Mask[1] >= 4) &&
|
||||
Mask[2] < 4 && (Mask[3] == -1 || Mask[3] >= 4)) {
|
||||
|
@ -239,11 +239,35 @@ define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
|
||||
define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4f64_0426
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vshufpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4f64_1537
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpckhpd {{.*}} # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4f64_4062
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4f64_5173
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpckhpd {{.*}} # ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4f64_5163
|
||||
; AVX1: # BB#0:
|
||||
|
Loading…
x
Reference in New Issue
Block a user