[DAGCombiner] Improve the folding of target independet shuffles to Undef.

When combining a pair of shuffle nodes, check if the combined shuffle mask is
trivially Undef. In case, immediately fold that pair of shuffles to Undef.

The lack of checks for undef masks was the root-cause of a poor-codegen bug
in the dag combiner.

Example:
  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 1, i32 6>
  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 6>
  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 3>

Before this patch, on x86 (with -mcpu=corei7) we failed to fold the entire
sequence to Undef value and therefore we generated:
  shufps $-123, %xmm1, $xmm0
  pshufd $-46, %xmm0, %xmm0

With this patch, the entire shuffle sequence is folded to Undef and no
shuffles are generated in the output assembly.

Added new test cases to test 'combine-vec-shuffle-5.ll'.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215797 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrea Di Biagio 2014-08-16 00:29:44 +00:00
parent 5dc48ac04a
commit 89cea3c36b
2 changed files with 207 additions and 0 deletions

View File

@ -10787,6 +10787,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Idx = OtherSV->getMaskElt(Idx);
Mask.push_back(Idx);
}
// Check if all indices in Mask are Undef. In case, propagate Undef.
bool isUndefMask = true;
for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
isUndefMask &= Mask[i] < 0;
if (isUndefMask)
return DAG.getUNDEF(VT);
bool CommuteOperands = false;
if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
@ -10932,6 +10940,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Mask.push_back(Idx);
}
// Check if all indices in Mask are Undef. In case, propagate Undef.
bool isUndefMask = true;
for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
isUndefMask &= Mask[i] < 0;
if (isUndefMask)
return DAG.getUNDEF(VT);
// Avoid introducing shuffles with illegal mask.
if (TLI.isShuffleMaskLegal(Mask, VT)) {
if (IsSV1Undef)

View File

@ -255,3 +255,194 @@ define <4 x i8> @test4c(<4 x i8>* %a, <4 x i8>* %b) {
; CHECK: blendps $13
; CHECK: ret
; Verify that the dag combiner correctly folds the following shuffle pairs to Undef.
define <4 x i32> @test1b(<4 x i32> %A) {
%1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
ret <4 x i32> %2
}
; CHECK-LABEL: test1b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test2b(<4 x i32> %A) {
%1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 1, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 6, i32 7>
ret <4 x i32> %2
}
; CHECK-LABEL: test2b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test3b(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
ret <4 x i32> %2
}
; CHECK-LABEL: test3b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test4b(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 1, i32 1, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 0, i32 3, i32 3, i32 0>
ret <4 x i32> %2
}
; CHECK-LABEL: test4b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test5b(<4 x i32> %A) {
%1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x i32> %3
}
; CHECK-LABEL: test5b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test6b(<4 x i32> %A) {
%1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 1, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 6, i32 7>
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x i32> %3
}
; CHECK-LABEL: test6b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test7b(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
ret <4 x i32> %3
}
; CHECK-LABEL: test7b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test8b(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 1, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 6>
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 3>
ret <4 x i32> %3
}
; CHECK-LABEL: test8b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test9b(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 1, i32 undef, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 2, i32 1>
%3 = shufflevector <4 x i32> %2, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 1, i32 2>
ret <4 x i32> %3
}
; CHECK-LABEL: test9b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test10b(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 undef, i32 undef, i32 1, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> %A, <4 x i32> <i32 0, i32 6, i32 1, i32 0>
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 2>
ret <4 x i32> %3
}
; CHECK-LABEL: test10b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test11b(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
%2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
ret <4 x i32> %3
}
; CHECK-LABEL: test11b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <4 x i32> @test12b(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
%2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 0, i32 3, i32 3, i32 0>
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 1, i32 4>
ret <4 x i32> %3
}
; CHECK-LABEL: test12b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <8 x i32> @test13b(<8 x i32> %A, <8 x i32> %B) {
%1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 undef>
%2 = shufflevector <8 x i32> %1, <8 x i32> %B, <8 x i32> <i32 1, i32 3, i32 1, i32 3, i32 1, i32 3, i32 1, i32 3>
%3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 0, i32 9, i32 1, i32 10, i32 0, i32 9, i32 1, i32 10>
ret <8 x i32> %3
}
; CHECK-LABEL: test13b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <8 x i32> @test14b(<8 x i32> %A, <8 x i32> %B) {
%1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 undef, i32 1, i32 1, i32 undef, i32 undef, i32 1, i32 1, i32 undef>
%2 = shufflevector <8 x i32> %1, <8 x i32> %B, <8 x i32> <i32 0, i32 3, i32 3, i32 0, i32 0, i32 3, i32 3, i32 0>
%3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 9, i32 1, i32 8, i32 1, i32 9, i32 1, i32 8>
ret <8 x i32> %3
}
; CHECK-LABEL: test14b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <8 x i32> @test15b(<8 x i32> %A, <8 x i32> %B) {
%1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 0, i32 1, i32 undef, i32 11, i32 0, i32 1, i32 undef, i32 11>
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 8, i32 9, i32 2, i32 11, i32 8, i32 9, i32 2, i32 11>
%3 = shufflevector <8 x i32> %2, <8 x i32> %A, <8 x i32> <i32 2, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 undef, i32 2>
ret <8 x i32> %3
}
; CHECK-LABEL: test15b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret
define <8 x i32> @test16b(<8 x i32> %A, <8 x i32> %B) {
%1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 10, i32 undef, i32 undef, i32 1, i32 10>
%2 = shufflevector <8 x i32> %1, <8 x i32> %A, <8 x i32> <i32 0, i32 10, i32 2, i32 11, i32 0, i32 10, i32 2, i32 11>
%3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 4, i32 9, i32 undef, i32 0, i32 4, i32 9, i32 undef, i32 0>
ret <8 x i32> %3
}
; CHECK-LABEL: test16b
; CHECK-NOT: blendps
; CHECK-NOT: pshufd
; CHECK-NOT: movhlps
; CHECK: ret