mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-31 10:34:17 +00:00
[DAG] Improved target independent vector shuffle folding logic.
This patch teaches the DAGCombiner how to combine shuffles according to rules: shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2) shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2) shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
01e39346f3
commit
37f645cb34
@ -11239,6 +11239,26 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
|
||||
return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
|
||||
}
|
||||
|
||||
// Compute the commuted shuffle mask.
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
int idx = Mask[i];
|
||||
if (idx < 0)
|
||||
continue;
|
||||
else if (idx < (int)NumElts)
|
||||
Mask[i] = idx + NumElts;
|
||||
else
|
||||
Mask[i] = idx - NumElts;
|
||||
}
|
||||
|
||||
if (TLI.isShuffleMaskLegal(Mask, VT)) {
|
||||
if (IsSV1Undef)
|
||||
// shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2)
|
||||
return DAG.getVectorShuffle(VT, SDLoc(N), N1, SV0, &Mask[0]);
|
||||
// shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2)
|
||||
// shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2)
|
||||
return DAG.getVectorShuffle(VT, SDLoc(N), SV1, SV0, &Mask[0]);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -1146,18 +1146,14 @@ define <4 x float> @combine_test1(<4 x float> %a, <4 x float> %b) {
|
||||
define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: combine_test2:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test2:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_test2:
|
||||
@ -1268,18 +1264,14 @@ define <4 x i32> @combine_test6(<4 x i32> %a, <4 x i32> %b) {
|
||||
define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) {
|
||||
; SSE2-LABEL: combine_test7:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test7:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_test7:
|
||||
@ -1385,14 +1377,12 @@ define <4 x float> @combine_test12(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: combine_test12:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test12:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -1486,14 +1476,12 @@ define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) {
|
||||
; SSE2-LABEL: combine_test17:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test17:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@ -1700,30 +1688,24 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) {
|
||||
define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
|
||||
; SSE2-LABEL: combine_test1c:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd (%rdi), %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: movd (%rsi), %xmm1
|
||||
; SSE2-NEXT: movd (%rdi), %xmm1
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
|
||||
; SSE2-NEXT: movd (%rsi), %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: movss %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test1c:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd (%rdi), %xmm0
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSSE3-NEXT: movd (%rsi), %xmm1
|
||||
; SSSE3-NEXT: movd (%rdi), %xmm1
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
|
||||
; SSSE3-NEXT: movd (%rsi), %xmm0
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSSE3-NEXT: movss %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_test1c:
|
||||
@ -1984,19 +1966,13 @@ define <4 x float> @combine_blend_02(<4 x float> %a, <4 x float> %b) {
|
||||
define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-LABEL: combine_blend_123:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm2
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[2,3]
|
||||
; SSE2-NEXT: movsd %xmm2, %xmm1
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_blend_123:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm2
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[2,3]
|
||||
; SSSE3-NEXT: movsd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
|
Loading…
x
Reference in New Issue
Block a user