[DAG] Further improve the logic in DAGCombiner that folds a pair of shuffles into a single shuffle if the resulting mask is legal.

This patch teaches the DAGCombiner how to fold shuffles according to the
following new rules:
  1. shuffle(shuffle(x, y), undef) -> x
  2. shuffle(shuffle(x, y), undef) -> y
  3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef)
  4. shuffle(shuffle(x, y), undef) -> shuffle(y, undef)

The backend avoids to combine shuffles according to rules 3. and 4. if
the resulting shuffle does not have a legal mask. This is to avoid introducing
illegal shuffles that are potentially expanded into a sub-optimal sequence of
target specific dag nodes during vector legalization.

Added test case combine-vec-shuffle-2.ll to verify that we correctly triggers
the new rules when combining shuffles.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212748 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrea Di Biagio 2014-07-10 18:04:55 +00:00
parent 60079c18bc
commit 3996e5290a
3 changed files with 217 additions and 14 deletions

View File

@ -10677,17 +10677,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// If this shuffle node is simply a swizzle of another shuffle node,
// and it reverses the swizzle of the previous shuffle then we can
// optimize shuffle(shuffle(x, undef), undef) -> x.
// then try to simplify it.
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
N1.getOpcode() == ISD::UNDEF) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
// Shuffle nodes can only reverse shuffles with a single non-undef value.
if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
return SDValue();
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(OtherSV->getOperand(0).getValueType() == VT &&
@ -10704,27 +10699,69 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Idx = OtherSV->getMaskElt(Idx);
Mask.push_back(Idx);
}
bool CommuteOperands = false;
if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
// To be valid, the combine shuffle mask should only reference elements
// from one of the two vectors in input to the inner shufflevector.
bool IsValidMask = true;
for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
// See if the combined mask only reference undefs or elements coming
// from the first shufflevector operand.
IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
if (!IsValidMask) {
IsValidMask = true;
for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
// Check that all the elements come from the second shuffle operand.
IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
CommuteOperands = IsValidMask;
}
// Early exit if the combined shuffle mask is not valid.
if (!IsValidMask)
return SDValue();
}
// See if this pair of shuffles can be safely folded according to either
// of the following rules:
// shuffle(shuffle(x, y), undef) -> x
// shuffle(shuffle(x, undef), undef) -> x
// shuffle(shuffle(x, y), undef) -> y
bool IsIdentityMask = true;
unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
// Skip Undefs.
if (Mask[i] < 0)
continue;
// The combined shuffle must map each index to itself.
IsIdentityMask = (unsigned)Mask[i] == i;
IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
}
if (IsIdentityMask)
// optimize shuffle(shuffle(x, undef), undef) -> x.
if (IsIdentityMask) {
if (CommuteOperands)
// optimize shuffle(shuffle(x, y), undef) -> y.
return OtherSV->getOperand(1);
// optimize shuffle(shuffle(x, undef), undef) -> x
// optimize shuffle(shuffle(x, y), undef) -> x
return OtherSV->getOperand(0);
}
// It may still be beneficial to combine the two shuffles if the
// resulting shuffle is legal.
// shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
if (TLI.isShuffleMaskLegal(Mask, VT))
return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
if (TLI.isShuffleMaskLegal(Mask, VT)) {
if (!CommuteOperands)
// shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
// shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
&Mask[0]);
// shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
&Mask[0]);
}
}
return SDValue();

View File

@ -0,0 +1,164 @@
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
; Check that DAGCombiner correctly folds the following pairs of shuffles
; using the following rules:
; 1. shuffle(shuffle(x, y), undef) -> x
; 2. shuffle(shuffle(x, y), undef) -> y
; 3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef)
; 4. shuffle(shuffle(x, y), undef) -> shuffle(undef, y)
;
; Rules 3. and 4. are used only if the resulting shuffle mask is legal.
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
ret <4 x i32> %2
}
; CHECK-LABEL: test1
; Mask: [3,0,0,1]
; CHECK: pshufd $67
; CHECK-NEXT: ret
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
ret <4 x i32> %2
}
; CHECK-LABEL: test2
; Mask: [2,0,0,3]
; CHECK: pshufd $-62
; CHECK-NEXT: ret
define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
ret <4 x i32> %2
}
; CHECK-LABEL: test3
; Mask: [2,0,0,3]
; CHECK: pshufd $-62
; CHECK-NEXT: ret
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3>
ret <4 x i32> %2
}
; CHECK-LABEL: test4
; Mask: [0,0,0,1]
; CHECK: pshufd $64
; CHECK-NEXT: ret
define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
ret <4 x i32> %2
}
; CHECK-LABEL: test5
; Mask: [1,1]
; CHECK: movhlps
; CHECK-NEXT: ret
define <4 x i32> @test6(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
ret <4 x i32> %2
}
; CHECK-LABEL: test6
; Mask: [2,0,0,0]
; CHECK: pshufd $2
; CHECK-NEXT: ret
define <4 x i32> @test7(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
ret <4 x i32> %2
}
; CHECK-LABEL: test7
; Mask: [0,2,0,2]
; CHECK: pshufd $-120
; CHECK-NEXT: ret
define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
ret <4 x i32> %2
}
; CHECK-LABEL: test8
; Mask: [1,0,3,0]
; CHECK: pshufd $49
; CHECK-NEXT: ret
define <4 x i32> @test9(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
ret <4 x i32> %2
}
; CHECK-LABEL: test9
; Mask: [1,3,0,2]
; CHECK: pshufd $-115
; CHECK-NEXT: ret
define <4 x i32> @test10(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
ret <4 x i32> %2
}
; CHECK-LABEL: test10
; Mask: [1,0,1,0]
; CHECK: pshufd $17
; CHECK-NEXT: ret
define <4 x i32> @test11(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
ret <4 x i32> %2
}
; CHECK-LABEL: test11
; Mask: [1,0,2,1]
; CHECK: pshufd $97
; CHECK-NEXT: ret
define <4 x i32> @test12(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
ret <4 x i32> %2
}
; CHECK-LABEL: test12
; Mask: [0,0,0,0]
; CHECK: pshufd $0
; CHECK-NEXT: ret
; The following pair of shuffles is folded into vector %A.
define <4 x i32> @test13(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
ret <4 x i32> %2
}
; CHECK-LABEL: test13
; CHECK-NOT: pshufd
; CHECK: ret
; The following pair of shuffles is folded into vector %B.
define <4 x i32> @test14(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4>
ret <4 x i32> %2
}
; CHECK-LABEL: test14
; CHECK-NOT: pshufd
; CHECK: ret

View File

@ -33,7 +33,9 @@ entry:
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
entry:
; CHECK-LABEL: shuf3:
; CHECK: shufps
; CHECK-NOT: movlhps
; CHECK-NOT: shufps
; CHECK: pshufd
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
%tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>