[x86] Add another combine that is particularly useful for the new vector

shuffle lowering: match shuffle patterns equivalent to an unpcklwd or
unpckhwd instruction.

This allows us to use generic lowering code for v8i16 shuffles and match
the unpack pattern late.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212705 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2014-07-10 11:09:29 +00:00
parent 9aa8beb9d6
commit a4e7f05a28
2 changed files with 52 additions and 3 deletions

View File

@ -18694,6 +18694,47 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
}
// Look for shuffle patterns which can be implemented as a single unpack.
// FIXME: This doesn't handle the location of the PSHUFD generically, and
// only works when we have a PSHUFD followed by two half-shuffles.
if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
(V.getOpcode() == X86ISD::PSHUFLW ||
V.getOpcode() == X86ISD::PSHUFHW) &&
V.getOpcode() != N.getOpcode() &&
V.hasOneUse()) {
SDValue D = V.getOperand(0);
while (D.getOpcode() == ISD::BITCAST && D.hasOneUse())
D = D.getOperand(0);
if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
int WordMask[8];
for (int i = 0; i < 4; ++i) {
WordMask[i + NOffset] = Mask[i] + NOffset;
WordMask[i + VOffset] = VMask[i] + VOffset;
}
// Map the word mask through the DWord mask.
int MappedMask[8];
for (int i = 0; i < 8; ++i)
MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
if (std::equal(std::begin(MappedMask), std::end(MappedMask),
std::begin(UnpackLoMask)) ||
std::equal(std::begin(MappedMask), std::end(MappedMask),
std::begin(UnpackHiMask))) {
// We can replace all three shuffles with an unpack.
V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
: X86ISD::UNPCKH,
DL, MVT::v8i16, V, V);
}
}
}
break;
case X86ISD::PSHUFD:

View File

@ -44,14 +44,22 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(
; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
; CHECK-SSE2: # BB#0:
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,1,1,4,5,6,7]
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,7,7]
; CHECK-SSE2-NEXT: punpcklwd %xmm0, %xmm0
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SSE2-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07
; CHECK-SSE2: # BB#0:
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
; CHECK-SSE2-NEXT: punpckhwd %xmm0, %xmm0
; CHECK-SSE2-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
; CHECK-SSE2: # BB#0: