[X86] Add a check for 'isMOVHLPSMask' within method 'isShuffleMaskLegal'.

Before this change, method 'isShuffleMaskLegal' didn't know that shuffles
implementing a 'movhlps' operation were perfectly legal for SSE targets.

This patch adds the missing check for 'isMOVHLPSMask' inside method
'isShuffleMaskLegal' to fix the problem.

The reason why it is important to do this is because the DAGCombiner
conservatively avoids combining a pair of shuffles if the resulting shuffle
node has an illegal mask. Before this patch, shuffles with a MOVHLPS mask were
wrongly considered not to be legal. This was the root cause of some poor-code
generation bugs.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213137 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrea Di Biagio 2014-07-16 11:29:39 +00:00
parent 8fa83afb83
commit 35f6e97777
3 changed files with 35 additions and 26 deletions

View File

@ -16880,6 +16880,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, SVT) ||
isMOVHLPSMask(M, SVT) ||
isSHUFPMask(M, SVT) ||
isPSHUFDMask(M, SVT) ||
isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||

View File

@ -35,14 +35,10 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x float> %2
}
; FIXME: this should be lowered as a single movhlps. However, the backend
; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
; end up with the sub-optimal sequence 'shufps, palignr'.
; CHECK-LABEL: test4
; Mask: [6,7,2,3]
; CHECK: shufps $94
; CHECK: palignr $8
; CHECK: ret
; CHECK: movhlps
; CHECK-NEXT: ret
define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
@ -90,13 +86,10 @@ define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i32> %2
}
; FIXME: this should be lowered as a single movhlps. However, the backend thinks that
; shuffle mask [6,7,2,3] is not legal.
; CHECK-LABEL: test9
; Mask: [6,7,2,3]
; CHECK: shufps $94
; CHECK: palignr $8
; CHECK: ret
; CHECK: movhlps
; CHECK-NEXT: ret
define <4 x i32> @test10(<4 x i32> %a, <4 x i32> %b) {
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
@ -144,13 +137,9 @@ define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x float> %2
}
; FIXME: this should be lowered as a single movhlps. However, the backend
; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
; end up with the sub-optimal sequence 'pshufd, palignr'.
; CHECK-LABEL: test14
; Mask: [6,7,2,3]
; CHECK: pshufd $94
; CHECK: palignr $8
; CHECK: movhlps
; CHECK: ret
define <4 x float> @test15(<4 x float> %a, <4 x float> %b) {
@ -199,13 +188,9 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
%2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i32> %2
}
; FIXME: this should be lowered as a single movhlps. However, the backend
; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
; end up with the sub-optimal sequence 'shufps, palignr'.
; CHECK-LABEL: test19
; Mask: [6,7,2,3]
; CHECK: pshufd $94
; CHECK: palignr $8
; CHECK: movhlps
; CHECK: ret
define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) {
@ -371,3 +356,30 @@ define <4 x float> @blend_123(<4 x float> %a, <4 x float> %b) {
; CHECK: movss
; CHECK: ret
define <4 x i32> @test_movhl_1(<4 x i32> %a, <4 x i32> %b) {
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3>
ret <4 x i32> %2
}
; CHECK-LABEL: test_movhl_1
; CHECK: movhlps
; CHECK-NEXT: ret
define <4 x i32> @test_movhl_2(<4 x i32> %a, <4 x i32> %b) {
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2>
ret <4 x i32> %2
}
; CHECK-LABEL: test_movhl_2
; CHECK: movhlps
; CHECK-NEXT: ret
define <4 x i32> @test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2>
ret <4 x i32> %2
}
; CHECK-LABEL: test_movhl_3
; CHECK: movhlps
; CHECK-NEXT: ret

View File

@ -38,14 +38,10 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x float> %2
}
; FIXME: this should be lowered as a single movhlps. However, the backend
; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
; end up with the sub-optimal sequence 'movhlps, palignr'.
; CHECK-LABEL: test4
; Mask: [6,7,2,3]
; CHECK: movhlps
; CHECK: palignr $8
; CHECK: ret
; CHECK-NEXT: ret
define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>