mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-22 10:33:23 +00:00
[x86] Expand the target DAG combining for PSHUFD nodes to be able to
combine into half-shuffles through unpack instructions that expand the half to a whole vector without messing with the dword lanes. This fixes some redundant instructions in splat-like lowerings for v16i8, which are now getting to be *really* nice. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212695 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dc90a3ab8f
commit
977aab501d
@ -18492,6 +18492,39 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
|
||||
return false;
|
||||
|
||||
continue;
|
||||
|
||||
case X86ISD::UNPCKL:
|
||||
case X86ISD::UNPCKH:
|
||||
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
|
||||
// shuffle into a preceding word shuffle.
|
||||
if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
|
||||
return false;
|
||||
|
||||
// Search for a half-shuffle which we can combine with.
|
||||
unsigned CombineOp =
|
||||
V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
|
||||
if (V.getOperand(0) != V.getOperand(1) ||
|
||||
!V->isOnlyUserOf(V.getOperand(0).getNode()))
|
||||
return false;
|
||||
V = V.getOperand(0);
|
||||
do {
|
||||
switch (V.getOpcode()) {
|
||||
default:
|
||||
return false; // Nothing to combine.
|
||||
|
||||
case X86ISD::PSHUFLW:
|
||||
case X86ISD::PSHUFHW:
|
||||
if (V.getOpcode() == CombineOp)
|
||||
break;
|
||||
|
||||
// Fallthrough!
|
||||
case ISD::BITCAST:
|
||||
V = V.getOperand(0);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} while (V.hasOneUse());
|
||||
break;
|
||||
}
|
||||
// Break out of the loop if we break out of the switch.
|
||||
break;
|
||||
@ -18508,7 +18541,7 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
|
||||
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
|
||||
for (int &M : Mask)
|
||||
M = VMask[M];
|
||||
V = DAG.getNode(X86ISD::PSHUFD, DL, V.getValueType(), V.getOperand(0),
|
||||
V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
|
||||
getV4X86ShuffleImm8ForMask(Mask, DAG));
|
||||
|
||||
// It is possible that one of the combinable shuffles was completely absorbed
|
||||
|
@ -31,9 +31,8 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(
|
||||
; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
|
||||
; CHECK-SSE2: # BB#0:
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7]
|
||||
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
|
||||
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
@ -59,9 +58,7 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(
|
||||
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
|
||||
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
|
||||
; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
|
||||
; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
|
||||
; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
|
||||
; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
|
Loading…
x
Reference in New Issue
Block a user