mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-21 06:30:16 +00:00
[x86] Fix the very broken formation of vpunpck instructions in the
target-specific shuffl DAG combines. We were recognizing the paired shuffles backwards. This code needs to be replaced anyways as we have the same functionality elsewhere, but I'll do the refactoring in a follow-up, this is the minimal fix to the behavior. In addition to fixing miscompiles with the new vector shuffle lowering, it also causes the canonicalization to kick in much better, selecting the smaller encoding variants in lots of places in the new AVX path. This still isn't quite ideal as we don't need both the shufpd and the punpck instructions, but that'll get fixed in a follow-up patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215690 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
906ae95a16
commit
886f0101a7
@ -19496,7 +19496,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
|
||||
while (Mask.size() > 1) {
|
||||
SmallVector<int, 16> NewMask;
|
||||
for (int i = 0, e = Mask.size()/2; i < e; ++i) {
|
||||
if (Mask[2*i] % 2 != 0 || Mask[2*i] != Mask[2*i + 1] + 1) {
|
||||
if (Mask[2*i] % 2 != 0 || Mask[2*i] + 1 != Mask[2*i + 1]) {
|
||||
NewMask.clear();
|
||||
break;
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
|
||||
|
||||
; AVX-LABEL: sext_16i8_to_16i16
|
||||
; AVX: vpmovsxbw
|
||||
; AVX: vmovhlps
|
||||
; AVX: vpunpckhqdq
|
||||
; AVX: vpmovsxbw
|
||||
; AVX: ret
|
||||
define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-AVX1
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
@ -60,6 +61,14 @@ define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
|
||||
ret <4 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-AVX1-LABEL: @shuffle_v4i32_2121
|
||||
; CHECK-AVX1: vpshufd {{.*}} # xmm0 = xmm0[2,1,2,1]
|
||||
; CHECK-AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
|
||||
ret <4 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-SSE2-LABEL: @shuffle_v4f32_0001
|
||||
; CHECK-SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,0,1]
|
||||
|
@ -6,7 +6,7 @@ target triple = "x86_64-unknown-unknown"
|
||||
define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4i64_0001
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
|
||||
@ -18,7 +18,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
|
||||
@ -41,7 +41,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
|
||||
@ -52,7 +52,7 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4i64_1000
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
|
||||
@ -63,8 +63,8 @@ define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4i64_2200
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
|
||||
@ -76,7 +76,7 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
|
||||
; AVX1-NEXT: vpunpckhqdq {{.*}} # xmm1 = xmm1[1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
|
||||
@ -174,7 +174,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4i64_0124
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm2[0],xmm1[1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -185,7 +185,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4i64_0142
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm2[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm2 = xmm2[0,0]
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -197,7 +197,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -209,7 +209,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -229,7 +229,7 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm0[1]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -249,7 +249,7 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
|
||||
; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
|
Loading…
x
Reference in New Issue
Block a user