diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index fc4ffa6eaf2..306ef8a220f 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1,6 +1,8 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" @@ -33,6 +35,12 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00( ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pshufb %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -55,6 +63,11 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01( ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -78,26 +91,43 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08( ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: -; ALL: # BB#0: -; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: +; SSE: # BB#0: +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: -; ALL: # BB#0: -; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; ALL-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: +; SSE: # BB#0: +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -122,15 +152,25 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12( ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: -; ALL: # BB#0: -; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: +; SSE: # BB#0: +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -159,27 +199,44 @@ define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_0101010101010101: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: -; ALL: # BB#0: -; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: +; SSE: # BB#0: +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: -; ALL: # BB#0: -; ALL-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; ALL-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; ALL-NEXT: movdqa %xmm1, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: +; SSE: # BB#0: +; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -207,6 +264,11 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12( ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -237,6 +299,13 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20( ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -273,6 +342,13 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20( ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -294,6 +370,11 @@ define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; SSE41-NEXT: retq +; +; AVX-LABEL: trunc_v4i32_shuffle: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> ret <16 x i8> %shuffle } @@ -371,6 +452,14 @@ define <16 x i8> @PR20540(<8 x i8> %a) { ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: PR20540: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } @@ -399,6 +488,15 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz( ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 0 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle @@ -429,6 +527,15 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz( ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 0 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle @@ -459,6 +566,15 @@ define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16( ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,u,u,3,u,u,6,7,8,9,10,11,12,13,14],zero +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 0 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle @@ -491,6 +607,16 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz( ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12] +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 3 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle @@ -541,6 +667,11 @@ define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14( ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -587,6 +718,11 @@ define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14( ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -636,6 +772,11 @@ define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00( ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -687,6 +828,11 @@ define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16( ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -733,6 +879,11 @@ define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00( ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -784,6 +935,11 @@ define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30( ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } @@ -805,6 +961,11 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu( ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbq %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbq %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } @@ -827,6 +988,11 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz( ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbq %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbq %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } @@ -848,6 +1014,11 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu( ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbd %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbd %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } @@ -871,6 +1042,11 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz( ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbd %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbd %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } @@ -890,6 +1066,11 @@ define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu( ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbw %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbw %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } @@ -911,6 +1092,11 @@ define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz( ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbw %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbw %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle } @@ -976,6 +1162,15 @@ define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00( ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u] +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX-NEXT: retq entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -983,197 +1178,9 @@ entry: } define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { -; SSE2-LABEL: stress_test2: -; SSE2: # BB#0: # %entry -; SSE2-NEXT: pxor %xmm8, %xmm8 -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1],xmm4[2],xmm8[2],xmm4[3],xmm8[3],xmm4[4],xmm8[4],xmm4[5],xmm8[5],xmm4[6],xmm8[6],xmm4[7],xmm8[7] -; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm8[8],xmm0[9],xmm8[9],xmm0[10],xmm8[10],xmm0[11],xmm8[11],xmm0[12],xmm8[12],xmm0[13],xmm8[13],xmm0[14],xmm8[14],xmm0[15],xmm8[15] -; SSE2-NEXT: movdqa %xmm0, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[3,1,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,0,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,2,2,3] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0] -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm8[8],xmm5[9],xmm8[9],xmm5[10],xmm8[10],xmm5[11],xmm8[11],xmm5[12],xmm8[12],xmm5[13],xmm8[13],xmm5[14],xmm8[14],xmm5[15],xmm8[15] -; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm5[2,1,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm7 = xmm7[0,1,2,3,6,4,6,7] -; SSE2-NEXT: movdqa %xmm6, %xmm3 -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm7[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm7[2,3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[2,1,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[1,0,2,3,4,5,6,7] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm3[0] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,1,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[3,1,2,3] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1],xmm1[2],xmm8[2],xmm1[3],xmm8[3],xmm1[4],xmm8[4],xmm1[5],xmm8[5],xmm1[6],xmm8[6],xmm1[7],xmm8[7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm0[0,3,2,3,4,5,6,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,1] -; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,3,2,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm7[1,1,2,3,4,5,6,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,3,2,1] -; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[0,3,2,1,4,5,6,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm7[3,1,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE2-NEXT: packuswb %xmm6, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,1,2,0] -; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,3,3,4,5,6,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,1,3,4,5,6,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,3] -; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,5,6] -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm0[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0] -; SSE2-NEXT: packuswb %xmm0, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,4,7] -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm8[8],xmm2[9],xmm8[9],xmm2[10],xmm8[10],xmm2[11],xmm8[11],xmm2[12],xmm8[12],xmm2[13],xmm8[13],xmm2[14],xmm8[14],xmm2[15],xmm8[15] -; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,4,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,7,6,7] -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm2[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[2,3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; SSE2-NEXT: packuswb %xmm0, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1],xmm1[2],xmm8[2],xmm1[3],xmm8[3],xmm1[4],xmm8[4],xmm1[5],xmm8[5],xmm1[6],xmm8[6],xmm1[7],xmm8[7] -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm8[8],xmm2[9],xmm8[9],xmm2[10],xmm8[10],xmm2[11],xmm8[11],xmm2[12],xmm8[12],xmm2[13],xmm8[13],xmm2[14],xmm8[14],xmm2[15],xmm8[15] -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,0,0,4,5,6,7] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; SSE2-NEXT: movdqa %xmm4, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm8[8],xmm3[9],xmm8[9],xmm3[10],xmm8[10],xmm3[11],xmm8[11],xmm3[12],xmm8[12],xmm3[13],xmm8[13],xmm3[14],xmm8[14],xmm3[15],xmm8[15] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1],xmm4[2],xmm8[2],xmm4[3],xmm8[3],xmm4[4],xmm8[4],xmm4[5],xmm8[5],xmm4[6],xmm8[6],xmm4[7],xmm8[7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,1,0,1] -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,1,2,1] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,6,7] -; SSE2-NEXT: movdqa %xmm2, %xmm5 -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[3,0],xmm0[2,0] -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm0[2,3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,3,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] -; SSE2-NEXT: packuswb %xmm0, %xmm0 -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm4[3,1,2,3,4,5,6,7] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,3] -; SSE2-NEXT: packuswb %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[2,1,2,3,4,5,6,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,3,2,1] -; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,6,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[2,0,3,1,4,5,6,7] -; SSE2-NEXT: packuswb %xmm0, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: retq -; -; SSSE3-LABEL: stress_test2: -; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5] -; SSSE3-NEXT: movdqa %xmm1, %xmm3 -; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[13,14],zero,xmm3[0,10,5],zero,xmm3[10,10],zero,zero,zero,xmm3[14,12],zero,zero -; SSSE3-NEXT: por %xmm0, %xmm3 -; SSSE3-NEXT: movdqa %xmm2, %xmm0 -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,12,5,9,1,5],zero,zero,xmm4[u,u,u,u,u,u,u,u] -; SSSE3-NEXT: por %xmm0, %xmm4 -; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u] -; SSSE3-NEXT: por %xmm1, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] -; SSSE3-NEXT: movdqa %xmm2, %xmm1 -; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6],zero,xmm1[14,14],zero,zero,xmm1[11,0,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u] -; SSSE3-NEXT: por %xmm1, %xmm0 -; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[3,u],zero,zero,xmm1[u,u,u,u,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: por %xmm2, %xmm1 -; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSSE3-NEXT: retq -; -; SSE41-LABEL: stress_test2: -; SSE41: # BB#0: # %entry -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5] -; SSE41-NEXT: movdqa %xmm1, %xmm3 -; SSE41-NEXT: pshufb {{.*#+}} xmm3 = xmm3[13,14],zero,xmm3[0,10,5],zero,xmm3[10,10],zero,zero,zero,xmm3[14,12],zero,zero -; SSE41-NEXT: por %xmm0, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u] -; SSE41-NEXT: movdqa %xmm1, %xmm4 -; SSE41-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,12,5,9,1,5],zero,zero,xmm4[u,u,u,u,u,u,u,u] -; SSE41-NEXT: por %xmm0, %xmm4 -; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u] -; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u] -; SSE41-NEXT: por %xmm1, %xmm2 -; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] -; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6],zero,xmm1[14,14],zero,zero,xmm1[11,0,u,u,u,u,u,u,u,u] -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u] -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pshufb {{.*#+}} xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u] -; SSE41-NEXT: movdqa %xmm3, %xmm1 -; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[3,u],zero,zero,xmm1[u,u,u,u,u,u,u,u,u,u,u,u] -; SSE41-NEXT: por %xmm2, %xmm1 -; SSE41-NEXT: pshufb {{.*#+}} xmm3 = xmm3[1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u] -; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE41-NEXT: retq +; Nothing interesting to test here. Just make sure we didn't crashe. +; ALL-LABEL: stress_test2: +; ALL: retq entry: %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index f1ae3612f5d..6f891c12505 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" @@ -309,10 +310,15 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: retq ; -; AVX-LABEL: shuffle_v2i64_03: -; AVX: # BB#0: -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; AVX-NEXT: retq +; AVX1-LABEL: shuffle_v2i64_03: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v2i64_03: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -341,10 +347,15 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: shuffle_v2i64_03_copy: -; AVX: # BB#0: -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] -; AVX-NEXT: retq +; AVX1-LABEL: shuffle_v2i64_03_copy: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v2i64_03_copy: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] +; AVX2-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -490,10 +501,15 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: shuffle_v2i64_21: -; AVX: # BB#0: -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX-NEXT: retq +; AVX1-LABEL: shuffle_v2i64_21: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v2i64_21: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX2-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -522,10 +538,15 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: shuffle_v2i64_21_copy: -; AVX: # BB#0: -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; AVX-NEXT: retq +; AVX1-LABEL: shuffle_v2i64_21_copy: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v2i64_21_copy: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] +; AVX2-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index c4c97f6b73e..080cd439476 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index a3ca88c927b..d0c4a3f9d36 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1,23 +1,35 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_01012323: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_01012323: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_01012323: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_67452301: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_67452301: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_67452301: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -38,6 +50,11 @@ define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_456789AB: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -59,40 +76,67 @@ define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_00000000: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_00004444: -; ALL: # BB#0: -; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; ALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_00004444: +; SSE: # BB#0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_00004444: +; AVX: # BB#0: +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_u0u1u2u3: -; ALL: # BB#0: -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_u0u1u2u3: +; SSE: # BB#0: +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_u0u1u2u3: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_u4u5u6u7: -; ALL: # BB#0: -; ALL-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_u4u5u6u7: +; SSE: # BB#0: +; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_u4u5u6u7: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_31206745: -; ALL: # BB#0: -; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_31206745: +; SSE: # BB#0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_31206745: +; AVX: # BB#0: +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -113,32 +157,54 @@ define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_44440000: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_23016745: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_23016745: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_23016745: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_23026745: -; ALL: # BB#0: -; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_23026745: +; SSE: # BB#0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_23026745: +; AVX: # BB#0: +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_23016747: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] -; ALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_23016747: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_23016747: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] +; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -159,6 +225,11 @@ define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_75643120: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -180,6 +251,11 @@ define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_10545410: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -200,6 +276,11 @@ define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_54105410: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -220,6 +301,11 @@ define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_54101054: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -240,6 +326,11 @@ define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_04400440: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -260,6 +351,11 @@ define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_40044004: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -283,6 +379,11 @@ define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_26405173: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -305,6 +406,11 @@ define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_20645173: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -326,6 +432,11 @@ define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_26401375: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -349,6 +460,11 @@ define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_66751643: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -371,6 +487,11 @@ define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_60514754: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> ret <8 x i16> %shuffle } @@ -392,6 +513,11 @@ define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_00444444: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -412,6 +538,11 @@ define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_44004444: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -432,6 +563,11 @@ define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_04404444: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -452,15 +588,26 @@ define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_04400000: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_04404567: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_04404567: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_04404567: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -482,6 +629,11 @@ define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0X444444: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -502,6 +654,11 @@ define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_44X04444: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -522,6 +679,11 @@ define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_X4404444: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -543,6 +705,11 @@ define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0127XXXX: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -564,6 +731,11 @@ define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_XXXX4563: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -585,6 +757,11 @@ define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_4563XXXX: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -606,6 +783,11 @@ define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_01274563: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -627,6 +809,11 @@ define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_45630127: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -651,114 +838,186 @@ define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_37102735: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_08192a3b: -; ALL: # BB#0: -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_08192a3b: +; SSE: # BB#0: +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_08192a3b: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_0c1d2e3f: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_0c1d2e3f: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0c1d2e3f: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_4c5d6e7f: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_4c5d6e7f: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_4c5d6e7f: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_48596a7b: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_48596a7b: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_48596a7b: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_08196e7f: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_08196e7f: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_08196e7f: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_0c1d6879: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_0c1d6879: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0c1d6879: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_109832ba: -; ALL: # BB#0: -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7] -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] -; ALL-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; ALL-NEXT: movdqa %xmm1, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_109832ba: +; SSE: # BB#0: +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_109832ba: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_8091a2b3: -; ALL: # BB#0: -; ALL-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; ALL-NEXT: movdqa %xmm1, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_8091a2b3: +; SSE: # BB#0: +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_8091a2b3: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_c4d5e6f7: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_c4d5e6f7: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_c4d5e6f7: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_0213cedf: -; ALL: # BB#0: -; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] -; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] -; ALL-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_0213cedf: +; SSE: # BB#0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0213cedf: +; AVX: # BB#0: +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -789,6 +1048,14 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_443aXXXX: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] +; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -817,14 +1084,26 @@ define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_032dXXXX: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: shuffle_v8i16_XXXdXXXX: -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_XXXdXXXX: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_XXXdXXXX: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -853,6 +1132,13 @@ define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_012dXXXX: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -882,6 +1168,13 @@ define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_XXXXcde3: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -911,6 +1204,13 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_cde3XXXX: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -953,6 +1253,17 @@ define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3] ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_012dcde3: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] +; AVX-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15] +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3] +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -983,6 +1294,13 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_XXX1X579: +; AVX: # BB#0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1017,64 +1335,107 @@ define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_XX4X8acX: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) { -; ALL-LABEL: shuffle_v8i16_8zzzzzzz: -; ALL: # BB#0: -; ALL-NEXT: movzwl %di, %eax -; ALL-NEXT: movd %eax, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_8zzzzzzz: +; SSE: # BB#0: +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_8zzzzzzz: +; AVX: # BB#0: +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) { -; ALL-LABEL: shuffle_v8i16_z8zzzzzz: -; ALL: # BB#0: -; ALL-NEXT: movzwl %di, %eax -; ALL-NEXT: movd %eax, %xmm0 -; ALL-NEXT: pslldq $2, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_z8zzzzzz: +; SSE: # BB#0: +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: pslldq $2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_z8zzzzzz: +; AVX: # BB#0: +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpslldq $2, %xmm0, %xmm0 +; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) { -; ALL-LABEL: shuffle_v8i16_zzzzz8zz: -; ALL: # BB#0: -; ALL-NEXT: movzwl %di, %eax -; ALL-NEXT: movd %eax, %xmm0 -; ALL-NEXT: pslldq $10, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_zzzzz8zz: +; SSE: # BB#0: +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: pslldq $10, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_zzzzz8zz: +; AVX: # BB#0: +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpslldq $10, %xmm0, %xmm0 +; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) { -; ALL-LABEL: shuffle_v8i16_zuuzuuz8: -; ALL: # BB#0: -; ALL-NEXT: movzwl %di, %eax -; ALL-NEXT: movd %eax, %xmm0 -; ALL-NEXT: pslldq $14, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_zuuzuuz8: +; SSE: # BB#0: +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: pslldq $14, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_zuuzuuz8: +; AVX: # BB#0: +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpslldq $14, %xmm0, %xmm0 +; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { -; ALL-LABEL: shuffle_v8i16_zzBzzzzz: -; ALL: # BB#0: -; ALL-NEXT: movzwl %di, %eax -; ALL-NEXT: movd %eax, %xmm0 -; ALL-NEXT: pslldq $4, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v8i16_zzBzzzzz: +; SSE: # BB#0: +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: pslldq $4, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_zzBzzzzz: +; AVX: # BB#0: +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpslldq $4, %xmm0, %xmm0 +; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 3 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> ret <8 x i16> %shuffle @@ -1106,6 +1467,11 @@ define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_def01234: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1130,6 +1496,11 @@ define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_ueuu123u: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1153,6 +1524,11 @@ define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_56701234: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1174,6 +1550,11 @@ define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_u6uu123u: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1194,6 +1575,11 @@ define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_uuuu123u: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1223,6 +1609,11 @@ define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_bcdef012: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1247,6 +1638,11 @@ define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_ucdeuu1u: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1270,6 +1666,11 @@ define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_34567012: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1291,6 +1692,11 @@ define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_u456uu1u: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1311,6 +1717,11 @@ define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) { ; SSE41: # BB#0: ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_u456uuuu: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1342,6 +1753,11 @@ define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_3456789a: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1368,6 +1784,11 @@ define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_u456uu9u: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1399,6 +1820,11 @@ define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_56789abc: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1425,6 +1851,11 @@ define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) { ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_u6uu9abu: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } @@ -1446,6 +1877,11 @@ define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) { ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxwq %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0uuu1uuu: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwq %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle } @@ -1469,6 +1905,11 @@ define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) { ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxwq %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0zzz1zzz: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwq %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle } @@ -1488,6 +1929,11 @@ define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) { ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxwd %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0u1u2u3u: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwd %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle } @@ -1509,6 +1955,11 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxwd %xmm0, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_0z1z2z3z: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwd %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle }