mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-02 07:17:36 +00:00
Reverted AVX-512 vector shuffle
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240258 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -6259,42 +6259,6 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane.
|
||||
///
|
||||
/// This checks a shuffle mask to see if it is performing the same
|
||||
/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies
|
||||
/// that it is also not lane-crossing. It may however involve a blend from the
|
||||
/// same lane of a second vector.
|
||||
///
|
||||
/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
|
||||
/// non-trivial to compute in the face of undef lanes. The representation is
|
||||
/// *not* suitable for use with existing 256-bit shuffles as it will contain
|
||||
/// entries from both V1 and V2 inputs to the wider mask.
|
||||
static bool
|
||||
is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
|
||||
SmallVectorImpl<int> &RepeatedMask) {
|
||||
int LaneSize = 256 / VT.getScalarSizeInBits();
|
||||
RepeatedMask.resize(LaneSize, -1);
|
||||
int Size = Mask.size();
|
||||
for (int i = 0; i < Size; ++i) {
|
||||
if (Mask[i] < 0)
|
||||
continue;
|
||||
if ((Mask[i] % Size) / LaneSize != i / LaneSize)
|
||||
// This entry crosses lanes, so there is no way to model this shuffle.
|
||||
return false;
|
||||
|
||||
// Ok, handle the in-lane shuffles by detecting if and when they repeat.
|
||||
if (RepeatedMask[i % LaneSize] == -1)
|
||||
// This is the first non-undef entry in this slot of a 256-bit lane.
|
||||
RepeatedMask[i % LaneSize] =
|
||||
Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
|
||||
else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
|
||||
// Found a mismatch with the repeated mask.
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
|
||||
/// arguments.
|
||||
///
|
||||
@@ -6354,22 +6318,6 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
|
||||
return DAG.getConstant(Imm, DL, MVT::i8);
|
||||
}
|
||||
|
||||
/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask.
|
||||
///
|
||||
/// This helper function produces an 8-bit shuffle immediate corresponding to
|
||||
/// the ubiquitous shuffle encoding scheme used in x86 instructions for
|
||||
/// shuffling 8 lanes.
|
||||
static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
|
||||
SelectionDAG &DAG) {
|
||||
assert(Mask.size() <= 8 &&
|
||||
"Up to 8 elts may be in Imm8 1-bit lane shuffle mask");
|
||||
unsigned Imm = 0;
|
||||
for (unsigned i = 0; i < Mask.size(); ++i)
|
||||
if (Mask[i] >= 0)
|
||||
Imm |= (Mask[i] % 2) << i;
|
||||
return DAG.getConstant(Imm, DL, MVT::i8);
|
||||
}
|
||||
|
||||
/// \brief Try to emit a blend instruction for a shuffle using bit math.
|
||||
///
|
||||
/// This is used as a fallback approach when first class blend instructions are
|
||||
@@ -9385,30 +9333,6 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
|
||||
DAG.getConstant(PermMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// \brief Handle lowering 4-lane 128-bit shuffles.
|
||||
static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
|
||||
SDValue V2, ArrayRef<int> WidenedMask,
|
||||
SelectionDAG &DAG) {
|
||||
|
||||
assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!");
|
||||
// form a 128-bit permutation.
|
||||
// convert the 64-bit shuffle mask selection values into 128-bit selection
|
||||
// bits defined by a vshuf64x2 instruction's immediate control byte.
|
||||
unsigned PermMask = 0, Imm = 0;
|
||||
|
||||
for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
|
||||
if(WidenedMask[i] == SM_SentinelZero)
|
||||
return SDValue();
|
||||
|
||||
// use first element in place of undef musk
|
||||
Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
|
||||
PermMask |= (Imm % 4) << (i * 2);
|
||||
}
|
||||
|
||||
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
|
||||
DAG.getConstant(PermMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
|
||||
/// shuffling each lane.
|
||||
///
|
||||
@@ -10144,105 +10068,35 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT,
|
||||
ArrayRef<int> Mask, SDValue V1,
|
||||
SDValue V2, SelectionDAG &DAG) {
|
||||
|
||||
assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN");
|
||||
// VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right)
|
||||
int AlignVal = -1;
|
||||
for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) {
|
||||
if (Mask[i] < 0)
|
||||
continue;
|
||||
if (Mask[i] < i)
|
||||
return SDValue();
|
||||
if (AlignVal == -1)
|
||||
AlignVal = Mask[i] - i;
|
||||
else if (Mask[i] - i != AlignVal)
|
||||
return SDValue();
|
||||
}
|
||||
// Vector source operands should be swapped
|
||||
return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1,
|
||||
DAG.getConstant(AlignVal, DL, MVT::i8));
|
||||
}
|
||||
|
||||
static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
|
||||
ArrayRef<int> Mask, SDValue V1,
|
||||
SDValue V2, SelectionDAG &DAG) {
|
||||
|
||||
assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
|
||||
|
||||
MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
|
||||
MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
|
||||
|
||||
SmallVector<SDValue, 32> VPermMask;
|
||||
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
|
||||
VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) :
|
||||
DAG.getConstant(Mask[i], DL,MaskEltVT));
|
||||
SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT,
|
||||
VPermMask);
|
||||
if (isSingleInputShuffleMask(Mask))
|
||||
return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
|
||||
|
||||
return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2);
|
||||
}
|
||||
|
||||
|
||||
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
|
||||
static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc DL(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
assert((V1.getSimpleValueType() == MVT::v8f64 ||
|
||||
V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
|
||||
assert((V2.getSimpleValueType() == MVT::v8f64 ||
|
||||
V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
|
||||
assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
|
||||
assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||
ArrayRef<int> Mask = SVOp->getMask();
|
||||
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
|
||||
|
||||
SmallVector<int, 4> WidenedMask;
|
||||
if (canWidenShuffleElements(Mask, WidenedMask))
|
||||
if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG))
|
||||
return Op;
|
||||
// X86 has dedicated unpack instructions that can handle specific blend
|
||||
// operations: UNPCKH and UNPCKL.
|
||||
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
|
||||
if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
|
||||
|
||||
if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
|
||||
return Op;
|
||||
|
||||
if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG))
|
||||
return Op;
|
||||
|
||||
// PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7
|
||||
if (isSingleInputShuffleMask(Mask)) {
|
||||
if (!is128BitLaneCrossingShuffleMask(VT, Mask))
|
||||
return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1,
|
||||
get1bitLaneShuffleImm8ForMask(Mask, DL, DAG));
|
||||
|
||||
SmallVector<int, 4> RepeatedMask;
|
||||
if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
|
||||
return DAG.getNode(X86ISD::VPERMI, DL, VT, V1,
|
||||
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
|
||||
}
|
||||
return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
|
||||
// FIXME: Implement direct support for this type!
|
||||
return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
|
||||
}
|
||||
|
||||
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
|
||||
static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
|
||||
static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc DL(Op);
|
||||
assert((V1.getSimpleValueType() == MVT::v16i32 ||
|
||||
V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
|
||||
assert((V2.getSimpleValueType() == MVT::v16i32 ||
|
||||
V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
|
||||
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
|
||||
assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||
ArrayRef<int> Mask = SVOp->getMask();
|
||||
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
|
||||
@@ -10253,39 +10107,67 @@ static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
0, 16, 1, 17, 4, 20, 5, 21,
|
||||
// Second 128-bit lane.
|
||||
8, 24, 9, 25, 12, 28, 13, 29}))
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
|
||||
if (isShuffleEquivalent(V1, V2, Mask,
|
||||
{// First 128-bit lane.
|
||||
2, 18, 3, 19, 6, 22, 7, 23,
|
||||
// Second 128-bit lane.
|
||||
10, 26, 11, 27, 14, 30, 15, 31}))
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
|
||||
|
||||
if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
|
||||
12, 12, 14, 14}))
|
||||
return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1);
|
||||
if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11,
|
||||
13, 13, 15, 15}))
|
||||
return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1);
|
||||
// FIXME: Implement direct support for this type!
|
||||
return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
|
||||
}
|
||||
|
||||
SmallVector<int, 4> RepeatedMask;
|
||||
if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) {
|
||||
if (isSingleInputShuffleMask(Mask)) {
|
||||
unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI;
|
||||
return DAG.getNode(Opc, DL, VT, V1,
|
||||
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
|
||||
}
|
||||
/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
|
||||
static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc DL(Op);
|
||||
assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
|
||||
assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||
ArrayRef<int> Mask = SVOp->getMask();
|
||||
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
if (RepeatedMask[i] >= 16)
|
||||
RepeatedMask[i] -= 12;
|
||||
return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG);
|
||||
}
|
||||
// X86 has dedicated unpack instructions that can handle specific blend
|
||||
// operations: UNPCKH and UNPCKL.
|
||||
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
|
||||
if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
|
||||
|
||||
if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
|
||||
return Op;
|
||||
// FIXME: Implement direct support for this type!
|
||||
return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
|
||||
}
|
||||
|
||||
return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
|
||||
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
|
||||
static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc DL(Op);
|
||||
assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
|
||||
assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||
ArrayRef<int> Mask = SVOp->getMask();
|
||||
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
|
||||
|
||||
// Use dedicated unpack instructions for masks that match their pattern.
|
||||
if (isShuffleEquivalent(V1, V2, Mask,
|
||||
{// First 128-bit lane.
|
||||
0, 16, 1, 17, 4, 20, 5, 21,
|
||||
// Second 128-bit lane.
|
||||
8, 24, 9, 25, 12, 28, 13, 29}))
|
||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
|
||||
if (isShuffleEquivalent(V1, V2, Mask,
|
||||
{// First 128-bit lane.
|
||||
2, 18, 3, 19, 6, 22, 7, 23,
|
||||
// Second 128-bit lane.
|
||||
10, 26, 11, 27, 14, 30, 15, 31}))
|
||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
|
||||
|
||||
// FIXME: Implement direct support for this type!
|
||||
return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
|
||||
}
|
||||
|
||||
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
|
||||
@@ -10345,11 +10227,13 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
// the requisite ISA extensions for that element type are available.
|
||||
switch (VT.SimpleTy) {
|
||||
case MVT::v8f64:
|
||||
case MVT::v8i64:
|
||||
return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG);
|
||||
return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
|
||||
case MVT::v16f32:
|
||||
return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
|
||||
case MVT::v8i64:
|
||||
return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
|
||||
case MVT::v16i32:
|
||||
return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG);
|
||||
return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
|
||||
case MVT::v32i16:
|
||||
if (Subtarget->hasBWI())
|
||||
return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
|
||||
|
||||
Reference in New Issue
Block a user