[x86] Teach the shuffle mask equivalence test to look through build

vectors and detect equivalent inputs.

This lets the code match unpck-style instructions when only one of the
inputs are lined up but the other input is a splat and so which lanes we
pull from doesn't matter. Today, this doesn't really happen, but just by
accident. I have a patch that normalizes how we shuffle splats, and with
that patch this will be necessary for a lot of the mask equivalence
tests to work.

I don't really know how to write a test case for this specific change
until the other change lands though.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229307 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2015-02-15 12:07:55 +00:00
parent 23b34c287f
commit 3d39845812

View File

@ -7368,13 +7368,25 @@ namespace {
/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
///
/// See its documentation for details.
bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
bool isShuffleEquivalentImpl(SDValue V1, SDValue V2, ArrayRef<int> Mask,
ArrayRef<const int *> Args) {
if (Mask.size() != Args.size())
return false;
// If the values are build vectors, we can look through them to find
// equivalent inputs that make the shuffles equivalent.
auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);
for (int i = 0, e = Mask.size(); i < e; ++i) {
assert(*Args[i] >= 0 && "Arguments must be positive integers!");
if (Mask[i] != -1 && Mask[i] != *Args[i])
return false;
if (Mask[i] != -1 && Mask[i] != *Args[i]) {
auto *MaskBV = Mask[i] < e ? BV1 : BV2;
auto *ArgsBV = *Args[i] < e ? BV1 : BV2;
if (!MaskBV || !ArgsBV ||
MaskBV->getOperand(Mask[i] % e) != ArgsBV->getOperand(*Args[i] % e))
return false;
}
}
return true;
}
@ -7391,8 +7403,9 @@ bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
/// It returns true if the mask is exactly as wide as the argument list, and
/// each element of the mask is either -1 (signifying undef) or the value given
/// in the argument.
static const VariadicFunction1<
bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
static const VariadicFunction3<bool, SDValue, SDValue, ArrayRef<int>, int,
isShuffleEquivalentImpl> isShuffleEquivalent =
{};
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
@ -8448,7 +8461,7 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (isSingleInputShuffleMask(Mask)) {
// Use low duplicate instructions for masks that match their pattern.
if (Subtarget->hasSSE3())
if (isShuffleEquivalent(Mask, 0, 0))
if (isShuffleEquivalent(V1, V2, Mask, 0, 0))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
// Straight shuffle of a single input vector. Simulate this by using the
@ -8484,7 +8497,7 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Try to use one of the special instruction patterns to handle two common
// blend patterns if a zero-blend above didn't work.
if (isShuffleEquivalent(Mask, 0, 3) || isShuffleEquivalent(Mask, 1, 3))
if (isShuffleEquivalent(V1, V2, Mask, 0, 3) || isShuffleEquivalent(V1, V2, Mask, 1, 3))
if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
// We can either use a special instruction to load over the low double or
// to move just the low double.
@ -8499,9 +8512,9 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 2))
if (isShuffleEquivalent(V1, V2, Mask, 0, 2))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 3))
if (isShuffleEquivalent(V1, V2, Mask, 1, 3))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
@ -8571,9 +8584,9 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 2))
if (isShuffleEquivalent(V1, V2, Mask, 0, 2))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 3))
if (isShuffleEquivalent(V1, V2, Mask, 1, 3))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
// Try to use byte rotation instructions.
@ -8728,9 +8741,9 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Use even/odd duplicate instructions for masks that match their pattern.
if (Subtarget->hasSSE3()) {
if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
if (isShuffleEquivalent(Mask, 1, 1, 3, 3))
if (isShuffleEquivalent(V1, V2, Mask, 1, 1, 3, 3))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
}
@ -8773,9 +8786,9 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 1, 5))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
// Otherwise fall back to a SHUFPS lowering strategy.
@ -8820,9 +8833,9 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
if (isShuffleEquivalent(Mask, 0, 0, 1, 1))
if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 1, 1))
Mask = UnpackLoMask;
else if (isShuffleEquivalent(Mask, 2, 2, 3, 3))
else if (isShuffleEquivalent(V1, V2, Mask, 2, 2, 3, 3))
Mask = UnpackHiMask;
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
@ -8855,9 +8868,9 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 1, 5))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
// Try to use byte rotation instructions.
@ -8934,9 +8947,9 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))
if (isShuffleEquivalent(V, V, Mask, 0, 0, 1, 1, 2, 2, 3, 3))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);
if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))
if (isShuffleEquivalent(V, V, Mask, 4, 4, 5, 5, 6, 6, 7, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);
// Try to use byte rotation instructions.
@ -9571,9 +9584,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 2, 10, 3, 11))
if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 2, 10, 3, 11))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
if (isShuffleEquivalent(V1, V2, Mask, 4, 12, 5, 13, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
// Try to use byte rotation instructions.
@ -10375,15 +10388,15 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
VT.getVectorNumElements() / 2);
// Check for patterns which can be matched with a single insert of a 128-bit
// subvector.
if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
if (isShuffleEquivalent(V1, V2, Mask, 0, 1, 0, 1) ||
isShuffleEquivalent(V1, V2, Mask, 0, 1, 4, 5)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
if (isShuffleEquivalent(V1, V2, Mask, 0, 1, 6, 7)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
@ -10522,7 +10535,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Broadcast;
// Use low duplicate instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);
if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
@ -10546,9 +10559,9 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 2, 6))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
// If we have a single input to the zero element, insert that into V1 if we
@ -10652,9 +10665,9 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 2, 6))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
}
@ -10710,9 +10723,9 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
"Repeated masks must be half the mask width!");
// Use even/odd duplicate instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 0, 2, 2, 4, 4, 6, 6))
if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2, 4, 4, 6, 6))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
if (isShuffleEquivalent(Mask, 1, 1, 3, 3, 5, 5, 7, 7))
if (isShuffleEquivalent(V1, V2, Mask, 1, 1, 3, 3, 5, 5, 7, 7))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
if (isSingleInputShuffleMask(Mask))
@ -10720,9 +10733,9 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 4, 12, 5, 13))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
@ -10816,9 +10829,9 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 4, 12, 5, 13))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
}
@ -10882,13 +10895,13 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask,
if (isShuffleEquivalent(V1, V2, Mask,
// First 128-bit lane:
0, 16, 1, 17, 2, 18, 3, 19,
// Second 128-bit lane:
8, 24, 9, 25, 10, 26, 11, 27))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
if (isShuffleEquivalent(Mask,
if (isShuffleEquivalent(V1, V2, Mask,
// First 128-bit lane:
4, 20, 5, 21, 6, 22, 7, 23,
// Second 128-bit lane:
@ -10972,14 +10985,14 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Note that these are repeated 128-bit lane unpacks, not unpacks across all
// 256-bit lanes.
if (isShuffleEquivalent(
Mask,
V1, V2, Mask,
// First 128-bit lane:
0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
// Second 128-bit lane:
16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
if (isShuffleEquivalent(
Mask,
V1, V2, Mask,
// First 128-bit lane:
8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
// Second 128-bit lane:
@ -11084,9 +11097,9 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 2, 10, 4, 12, 6, 14))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
if (isShuffleEquivalent(V1, V2, Mask, 1, 9, 3, 11, 5, 13, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
// FIXME: Implement direct support for this type!
@ -11105,11 +11118,11 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask,
if (isShuffleEquivalent(V1, V2, Mask,
0, 16, 1, 17, 4, 20, 5, 21,
8, 24, 9, 25, 12, 28, 13, 29))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
if (isShuffleEquivalent(Mask,
if (isShuffleEquivalent(V1, V2, Mask,
2, 18, 3, 19, 6, 22, 7, 23,
10, 26, 11, 27, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
@ -11131,9 +11144,9 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 2, 10, 4, 12, 6, 14))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
if (isShuffleEquivalent(V1, V2, Mask, 1, 9, 3, 11, 5, 13, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
// FIXME: Implement direct support for this type!
@ -11152,11 +11165,11 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask,
if (isShuffleEquivalent(V1, V2, Mask,
0, 16, 1, 17, 4, 20, 5, 21,
8, 24, 9, 25, 12, 28, 13, 29))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
if (isShuffleEquivalent(Mask,
if (isShuffleEquivalent(V1, V2, Mask,
2, 18, 3, 19, 6, 22, 7, 23,
10, 26, 11, 27, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
@ -22869,9 +22882,9 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
// We're looking for blends between FADD and FSUB nodes. We insist on these
// nodes being lined up in a specific expected pattern.
if (!(isShuffleEquivalent(Mask, 0, 3) ||
isShuffleEquivalent(Mask, 0, 5, 2, 7) ||
isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
if (!(isShuffleEquivalent(V1, V2, Mask, 0, 3) ||
isShuffleEquivalent(V1, V2, Mask, 0, 5, 2, 7) ||
isShuffleEquivalent(V1, V2, Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
return SDValue();
// Only specific types are legal at this point, assert so we notice if and