diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index db7091485be..fc4ffa6eaf2 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -6,29 +6,29 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { +; FIXME: SSE2 should look like the following: ; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 ; FIXME: # BB#0: ; FIXME-NEXT: punpcklbw %xmm0, %xmm0 ; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] ; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] ; FIXME-NEXT: retq -; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 ; -; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 +; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; SSE2: # BB#0: -; SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 +; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; SSSE3: # BB#0: ; SSSE3-NEXT: pxor %xmm1, %xmm1 ; SSSE3-NEXT: pshufb %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 +; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; SSE41: # BB#0: ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pshufb %xmm1, %xmm0 @@ -38,217 +38,203 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00( } define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) { -; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01 +; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: ; SSE2: # BB#0: -; SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01 +; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01 +; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { -; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08 +; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7] -; SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,2,4,5,6,7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08 +; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08 +; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03 +; ALL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: ; ALL: # BB#0: -; ALL-NEXT: punpcklbw %xmm0, %xmm0 -; ALL-NEXT: punpcklwd %xmm0, %xmm0 +; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; ALL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07 +; ALL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: ; ALL: # BB#0: -; ALL-NEXT: punpcklbw %xmm0, %xmm0 -; ALL-NEXT: punpckhwd %xmm0, %xmm0 +; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; ALL-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] ; ALL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { -; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12 +; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: ; SSE2: # BB#0: -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: punpcklbw %xmm0, %xmm0 -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12 +; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12 +; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07 +; ALL-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: ; ALL: # BB#0: -; ALL-NEXT: punpcklbw %xmm0, %xmm0 +; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; ALL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { +; FIXME: SSE2 should be the following: ; FIXME-LABEL: @shuffle_v16i8_0101010101010101 ; FIXME: # BB#0: ; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] ; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] ; FIXME-NEXT: retq ; -; SSE2-LABEL: @shuffle_v16i8_0101010101010101 +; SSE2-LABEL: shuffle_v16i8_0101010101010101: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_0101010101010101 +; SSSE3-LABEL: shuffle_v16i8_0101010101010101: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_0101010101010101 +; SSE41-LABEL: shuffle_v16i8_0101010101010101: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) { -; ALL-LABEL: @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23 -; ALL: punpcklbw %xmm1, %xmm0 +; ALL-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: +; ALL: # BB#0: +; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; ALL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { -; SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07 -; SSE2: # BB#0: -; SSE2-NEXT: punpcklbw %xmm1, %xmm1 -; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSE2-NEXT: punpcklbw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSSE3-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07 -; SSSE3: # BB#0: -; SSSE3-NEXT: punpcklbw %xmm1, %xmm1 -; SSSE3-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSSE3-NEXT: punpcklbw %xmm0, %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07 -; SSE41: # BB#0: -; SSE41-NEXT: punpcklbw %xmm1, %xmm1 -; SSE41-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSE41-NEXT: punpcklbw %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq +; ALL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: +; ALL: # BB#0: +; ALL-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; ALL-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; ALL-NEXT: movdqa %xmm1, %xmm0 +; ALL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) { -; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12 +; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: ; SSE2: # BB#0: ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhbw %xmm1, %xmm2 -; SSE2-NEXT: pshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4] -; SSE2-NEXT: punpcklbw %xmm1, %xmm0 -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12 +; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12 +; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { -; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20 +; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: ; SSE2: # BB#0: ; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: punpcklbw %xmm2, %xmm1 -; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4] -; SSE2-NEXT: punpcklbw %xmm2, %xmm0 -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] ; SSE2-NEXT: packuswb %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20 +; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20 +; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4] -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -256,35 +242,35 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20( } define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { -; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20 +; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: ; SSE2: # BB#0: ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: punpcklbw %xmm2, %xmm3 -; SSE2-NEXT: pshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4] ; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: punpckhbw %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*}} # xmm4 = xmm4[3,2,1,0,4,5,6,7] -; SSE2-NEXT: shufpd {{.*}} # xmm4 = xmm4[0],xmm3[1] -; SSE2-NEXT: punpckhbw %xmm2, %xmm1 -; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4] -; SSE2-NEXT: punpcklbw %xmm2, %xmm0 -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7] +; SSE2-NEXT: shufpd {{.*#+}} xmm4 = xmm4[0],xmm3[1] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSE2-NEXT: packuswb %xmm4, %xmm0 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20 +; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20 +; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4] -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -292,21 +278,21 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20( } define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { -; SSE2-LABEL: @trunc_v4i32_shuffle +; SSE2-LABEL: trunc_v4i32_shuffle: ; SSE2: # BB#0: -; SSE2-NEXT: pand +; SSE2-NEXT: pand .LCPI13_0(%rip), %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm0 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @trunc_v4i32_shuffle +; SSSE3-LABEL: trunc_v4i32_shuffle: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @trunc_v4i32_shuffle +; SSE41-LABEL: trunc_v4i32_shuffle: ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> ret <16 x i8> %shuffle @@ -315,9 +301,8 @@ define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) { ; We don't have anything useful to check here. This generates 100s of ; instructions. Instead, just make sure we survived codegen. -; -; ALL-LABEL: @stress_test0 -; ALL: retq +; ALL-LABEL: stress_test0: +; ALL: retq entry: %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> @@ -333,11 +318,13 @@ entry: } define <16 x i8> @stress_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind { -; We don't have anything useful to check here. This generates tons of -; instructions. Instead, just make sure we survived codegen. +; There is nothing interesting to check about these instructions other than +; that they survive codegen. However, we actually do better and delete all of +; them because the result is 'undef'. ; -; ALL-LABEL: @stress_test1 -; ALL: retq +; ALL-LABEL: stress_test1: +; ALL: # BB#0: # %entry +; ALL-NEXT: retq entry: %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> %s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> @@ -357,19 +344,31 @@ entry: } define <16 x i8> @PR20540(<8 x i8> %a) { -; SSSE3-LABEL: @PR20540 +; SSE2-LABEL: PR20540: +; SSE2: # BB#0: +; SSE2-NEXT: pand .LCPI16_0(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: PR20540: ; SSSE3: # BB#0: ; SSSE3-NEXT: pxor %xmm1, %xmm1 -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @PR20540 +; SSE41-LABEL: PR20540: ; SSE41: # BB#0: ; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0] -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> @@ -377,104 +376,315 @@ define <16 x i8> @PR20540(<8 x i8> %a) { } define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { -; SSE2-LABEL: @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz +; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: ; SSE2: # BB#0: -; SSE2-NEXT: movzbl {{.*}}, %[[R:.*]] -; SSE2-NEXT: movd %[[R]], %xmm0 +; SSE2-NEXT: movzbl %dil, %eax +; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; SSSE3: # BB#0: +; SSSE3-NEXT: movd %edi, %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; SSE41: # BB#0: +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 0 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { -; SSE2-LABEL: @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz +; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: ; SSE2: # BB#0: -; SSE2-NEXT: movzbl {{.*}}, %[[R:.*]] -; SSE2-NEXT: movd %[[R]], %xmm0 +; SSE2-NEXT: movzbl %dil, %eax +; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: pslldq $5, %xmm0 ; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; SSSE3: # BB#0: +; SSSE3-NEXT: movd %edi, %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; SSE41: # BB#0: +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 0 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) { -; SSE2-LABEL: @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16 +; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: ; SSE2: # BB#0: -; SSE2-NEXT: movzbl {{.*}}, %[[R:.*]] -; SSE2-NEXT: movd %[[R]], %xmm0 +; SSE2-NEXT: movzbl %dil, %eax +; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: pslldq $15, %xmm0 ; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: +; SSSE3: # BB#0: +; SSSE3-NEXT: movd %edi, %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,u,u,3,u,u,6,7,8,9,10,11,12,13,14],zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: +; SSE41: # BB#0: +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,u,u,3,u,u,6,7,8,9,10,11,12,13,14],zero +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 0 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { -; SSE2-LABEL: @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz +; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: ; SSE2: # BB#0: -; SSE2-NEXT: movzbl {{.*}}, %[[R:.*]] -; SSE2-NEXT: movd %[[R]], %xmm0 +; SSE2-NEXT: movzbl %dil, %eax +; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: pslldq $2, %xmm0 ; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; SSSE3: # BB#0: +; SSSE3-NEXT: movd %edi, %xmm0 +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: +; SSE41: # BB#0: +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12] +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 3 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { -; SSSE3-LABEL: @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14 +; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[3,1,2,0] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,0,1,2,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: packuswb %xmm3, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $15, {{.*}} # xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14 +; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $15, {{.*}} # xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { -; SSSE3-LABEL: @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14 +; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,1,2,0] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,0,1,2,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $15, {{.*}} # xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14 +; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $15, {{.*}} # xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) { -; SSSE3-LABEL: @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00 +; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[3,1,2,0] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[0,2,3,1,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm4[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,4,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,7] +; SSE2-NEXT: pand .LCPI23_0(%rip), %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,4] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE2-NEXT: packuswb %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $1, {{.*}} # xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00 +; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $1, {{.*}} # xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) { -; SSSE3-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16 +; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[3,1,2,0] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[0,2,3,1,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm4[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7] +; SSE2-NEXT: pand .LCPI24_0(%rip), %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,4] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $1, {{.*}} # xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16 +; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $1, {{.*}} # xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -482,29 +692,96 @@ define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16( } define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) { -; SSSE3-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00 +; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,3,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,0] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,3,1,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: packuswb %xmm3, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $1, {{.*}} # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00 +; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $1, {{.*}} # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) { -; SSSE3-LABEL: @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30 +; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[3,1,2,0] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,0,1,2,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,4,5,6] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: packuswb %xmm3, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $15, {{.*}} # xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30 +; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $15, {{.*}} # xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -512,19 +789,19 @@ define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30( } define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) { -; SSE2-LABEL: @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu +; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: ; SSE2: # BB#0: -; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu +; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu +; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbq %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -533,20 +810,20 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu( } define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { -; SSE2-LABEL: @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz +; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: ; SSE2: # BB#0: -; SSE2-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3],xmm0[4],[[X]][4],xmm0[5],[[X]][5],xmm0[6],[[X]][6],xmm0[7],[[X]][7] -; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3] -; SSE2-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz +; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz +; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbq %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -555,19 +832,19 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz( } define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) { -; SSE2-LABEL: @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu +; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: ; SSE2: # BB#0: -; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu +; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: ; SSSE3: # BB#0: -; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu +; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbd %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -576,21 +853,21 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu( } define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) { -; SSE2-LABEL: @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz +; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: ; SSE2: # BB#0: -; SSE2-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3],xmm0[4],[[X]][4],xmm0[5],[[X]][5],xmm0[6],[[X]][6],xmm0[7],[[X]][7] -; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz +; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: ; SSSE3: # BB#0: -; SSSE3-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]] -; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7] -; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz +; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbd %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -599,17 +876,17 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz( } define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) { -; SSE2-LABEL: @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu +; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: ; SSE2: # BB#0: -; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu +; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: ; SSSE3: # BB#0: -; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu +; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbw %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -618,19 +895,19 @@ define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu( } define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) { -; SSE2-LABEL: @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz +; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: ; SSE2: # BB#0: -; SSE2-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]] -; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz +; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: ; SSSE3: # BB#0: -; SSSE3-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]] -; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz +; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxbw %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -639,25 +916,64 @@ define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz( } define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) { -; SSSE3-LABEL: @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00 -; SSSE3: # BB#0: -; SSSE3-NEXT: movdqa %xmm0, %[[X:xmm[0-9]+]] -; SSSE3-NEXT: pshufb {{.*}} # [[X]] = [[X]][2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7] -; SSSE3-NEXT: punpcklbw {{.*}} # xmm1 = xmm1[0],[[X]][0],xmm1[1],[[X]][1],xmm1[2],[[X]][2],xmm1[3],[[X]][3],xmm1[4],[[X]][4],xmm1[5],[[X]][5],xmm1[6],[[X]][6],xmm1[7],[[X]][7] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: punpcklbw {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,3,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[2,0,3,1,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,3,1,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; SSE2-NEXT: packuswb %xmm0, %xmm4 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,3,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7] +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movdqa %xmm0, %xmm2 +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00 -; SSE41: # BB#0: -; SSE41-NEXT: movdqa %xmm0, %[[X:xmm[0-9]+]] -; SSE41-NEXT: pshufb {{.*}} # [[X]] = [[X]][2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u] -; SSE41-NEXT: pshufb {{.*}} # xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: punpcklbw {{.*}} # xmm1 = xmm1[0],[[X]][0],xmm1[1],[[X]][1],xmm1[2],[[X]][2],xmm1[3],[[X]][3],xmm1[4],[[X]][4],xmm1[5],[[X]][5],xmm1[6],[[X]][6],xmm1[7],[[X]][7] -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u] -; SSE41-NEXT: punpcklbw {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u] +; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq entry: @@ -667,34 +983,197 @@ entry: } define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { -; SSSE3-LABEL: @stress_test2 -; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5] -; SSSE3-NEXT: movdqa %xmm1, %[[X1:xmm[0-9]+]] -; SSSE3-NEXT: pshufb {{.*}} # [[X1]] = [[X1]][13,14],zero,[[X1]][0,10,5],zero,[[X1]][10,10],zero,zero,zero,[[X1]][14,12],zero,zero -; SSSE3-NEXT: por %xmm0, %[[X1]] -; SSSE3-NEXT: movdqa %xmm2, %xmm0 -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: movdqa %xmm1, %[[X2:xmm[0-9]+]] -; SSSE3-NEXT: pshufb {{.*}} # [[X2]] = [[X2]][1,12,5,9,1,5],zero,zero,[[X2]][u,u,u,u,u,u,u,u] -; SSSE3-NEXT: por %xmm0, %[[X2]] -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: pshufb {{.*}} # xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u] -; SSSE3-NEXT: por %xmm1, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*}} # xmm2 = xmm2[0],[[X2]][0],xmm2[1],[[X2]][1],xmm2[2],[[X2]][2],xmm2[3],[[X2]][3],xmm2[4],[[X2]][4],xmm2[5],[[X2]][5],xmm2[6],[[X2]][6],xmm2[7],[[X2]][7] -; SSSE3-NEXT: movdqa %xmm2, %[[X3:xmm[0-9]+]] -; SSSE3-NEXT: pshufb {{.*}} # [[X3]] = [[X3]][6],zero,[[X3]][14,14],zero,zero,[[X3]][11,0,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: movdqa %[[X1]], %xmm0 -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u] -; SSSE3-NEXT: por %[[X3]], %xmm0 -; SSSE3-NEXT: pshufb {{.*}} # xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: movdqa %[[X1]], %[[X3]] -; SSSE3-NEXT: pshufb {{.*}} # [[X3]] = [[X3]][3,u],zero,zero,[[X3]][u,u,u,u,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: por %xmm2, %[[X3]] -; SSSE3-NEXT: pshufb {{.*}} # [[X1]] = [[X1]][1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: punpcklbw {{.*}} # [[X1]] = [[X1]][0],[[X3]][0],[[X1]][1],[[X3]][1],[[X1]][2],[[X3]][2],[[X1]][3],[[X3]][3],[[X1]][4],[[X3]][4],[[X1]][5],[[X3]][5],[[X1]][6],[[X3]][6],[[X1]][7],[[X3]][7] -; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7] +; SSE2-LABEL: stress_test2: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pxor %xmm8, %xmm8 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1],xmm4[2],xmm8[2],xmm4[3],xmm8[3],xmm4[4],xmm8[4],xmm4[5],xmm8[5],xmm4[6],xmm8[6],xmm4[7],xmm8[7] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm8[8],xmm0[9],xmm8[9],xmm0[10],xmm8[10],xmm0[11],xmm8[11],xmm0[12],xmm8[12],xmm0[13],xmm8[13],xmm0[14],xmm8[14],xmm0[15],xmm8[15] +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[3,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,0,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,2,2,3] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0] +; SSE2-NEXT: movdqa %xmm1, %xmm5 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm8[8],xmm5[9],xmm8[9],xmm5[10],xmm8[10],xmm5[11],xmm8[11],xmm5[12],xmm8[12],xmm5[13],xmm8[13],xmm5[14],xmm8[14],xmm5[15],xmm8[15] +; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm5[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm7 = xmm7[0,1,2,3,6,4,6,7] +; SSE2-NEXT: movdqa %xmm6, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm7[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm7[2,3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[1,0,2,3,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm3[0] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[3,1,2,3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1],xmm1[2],xmm8[2],xmm1[3],xmm8[3],xmm1[4],xmm8[4],xmm1[5],xmm8[5],xmm1[6],xmm8[6],xmm1[7],xmm8[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm0[0,3,2,3,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,1] +; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,3,2,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm7[1,1,2,3,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,3,2,1] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[0,3,2,1,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm7[3,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE2-NEXT: packuswb %xmm6, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,3,3,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,1,3,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,5,6] +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm0[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0] +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,4,7] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm8[8],xmm2[9],xmm8[9],xmm2[10],xmm8[10],xmm2[11],xmm8[11],xmm2[12],xmm8[12],xmm2[13],xmm8[13],xmm2[14],xmm8[14],xmm2[15],xmm8[15] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,4,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,7,6,7] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm2[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[2,3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1],xmm1[2],xmm8[2],xmm1[3],xmm8[3],xmm1[4],xmm8[4],xmm1[5],xmm8[5],xmm1[6],xmm8[6],xmm1[7],xmm8[7] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm8[8],xmm2[9],xmm8[9],xmm2[10],xmm8[10],xmm2[11],xmm8[11],xmm2[12],xmm8[12],xmm2[13],xmm8[13],xmm2[14],xmm8[14],xmm2[15],xmm8[15] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,0,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; SSE2-NEXT: movdqa %xmm4, %xmm3 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm8[8],xmm3[9],xmm8[9],xmm3[10],xmm8[10],xmm3[11],xmm8[11],xmm3[12],xmm8[12],xmm3[13],xmm8[13],xmm3[14],xmm8[14],xmm3[15],xmm8[15] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1],xmm4[2],xmm8[2],xmm4[3],xmm8[3],xmm4[4],xmm8[4],xmm4[5],xmm8[5],xmm4[6],xmm8[6],xmm4[7],xmm8[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,1,0,1] +; SSE2-NEXT: movdqa %xmm3, %xmm5 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,1,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,6,7] +; SSE2-NEXT: movdqa %xmm2, %xmm5 +; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[3,0],xmm0[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm0[2,3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm4[3,1,2,3,4,5,6,7] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,3] +; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[2,1,2,3,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,3,2,1] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,6,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[2,0,3,1,4,5,6,7] +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: stress_test2: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5] +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[13,14],zero,xmm3[0,10,5],zero,xmm3[10,10],zero,zero,zero,xmm3[14,12],zero,zero +; SSSE3-NEXT: por %xmm0, %xmm3 +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movdqa %xmm1, %xmm4 +; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,12,5,9,1,5],zero,zero,xmm4[u,u,u,u,u,u,u,u] +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u] +; SSSE3-NEXT: por %xmm1, %xmm2 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6],zero,xmm1[14,14],zero,zero,xmm1[11,0,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u] +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movdqa %xmm3, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[3,u],zero,zero,xmm1[u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: por %xmm2, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: stress_test2: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pshufb {{.*#+}} xmm3 = xmm3[13,14],zero,xmm3[0,10,5],zero,xmm3[10,10],zero,zero,zero,xmm3[14,12],zero,zero +; SSE41-NEXT: por %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movdqa %xmm1, %xmm4 +; SSE41-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,12,5,9,1,5],zero,zero,xmm4[u,u,u,u,u,u,u,u] +; SSE41-NEXT: por %xmm0, %xmm4 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u] +; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u] +; SSE41-NEXT: por %xmm1, %xmm2 +; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6],zero,xmm1[14,14],zero,zero,xmm1[11,0,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u] +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movdqa %xmm3, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[3,u],zero,zero,xmm1[u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: por %xmm2, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm3 = xmm3[1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; SSE41-NEXT: retq entry: %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index 430a0f1eb2c..f1ae3612f5d 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -8,429 +8,613 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_00 -; ALL: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2i64_00: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_00: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_10 -; ALL: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2i64_10: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_10: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_11 -; ALL: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2i64_11: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_11: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_22 -; ALL: pshufd {{.*}} # xmm0 = xmm1[0,1,0,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2i64_22: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_22: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_32 -; ALL: pshufd {{.*}} # xmm0 = xmm1[2,3,0,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2i64_32: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_32: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_33 -; ALL: pshufd {{.*}} # xmm0 = xmm1[2,3,2,3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2i64_33: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_33: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { -; SSE2-LABEL: @shuffle_v2f64_00 -; SSE2: movlhps {{.*}} # xmm0 = xmm0[0,0] +; SSE2-LABEL: shuffle_v2f64_00: +; SSE2: # BB#0: +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2f64_00 -; SSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0] +; SSE3-LABEL: shuffle_v2f64_00: +; SSE3: # BB#0: +; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2f64_00 -; SSSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0] +; SSSE3-LABEL: shuffle_v2f64_00: +; SSSE3: # BB#0: +; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2f64_00 -; SSE41: unpcklpd {{.*}} # xmm0 = xmm0[0,0] +; SSE41-LABEL: shuffle_v2f64_00: +; SSE41: # BB#0: +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_00: +; AVX: # BB#0: +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: @shuffle_v2f64_10 -; SSE: shufpd {{.*}} # xmm0 = xmm0[1,0] +; SSE-LABEL: shuffle_v2f64_10: +; SSE: # BB#0: +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2f64_10 -; AVX: vpermilpd {{.*}} # xmm0 = xmm0[1,0] +; AVX-LABEL: shuffle_v2f64_10: +; AVX: # BB#0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { -; ALL-LABEL: @shuffle_v2f64_11 -; ALL: movhlps {{.*}} # xmm0 = xmm0[1,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2f64_11: +; SSE: # BB#0: +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_11: +; AVX: # BB#0: +; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { -; SSE2-LABEL: @shuffle_v2f64_22 -; SSE2: movlhps {{.*}} # xmm1 = xmm1[0,0] +; SSE2-LABEL: shuffle_v2f64_22: +; SSE2: # BB#0: +; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2f64_22 -; SSE3: unpcklpd {{.*}} # xmm1 = xmm1[0,0] +; SSE3-LABEL: shuffle_v2f64_22: +; SSE3: # BB#0: +; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] ; SSE3-NEXT: movapd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2f64_22 -; SSSE3: unpcklpd {{.*}} # xmm1 = xmm1[0,0] +; SSSE3-LABEL: shuffle_v2f64_22: +; SSSE3: # BB#0: +; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] ; SSSE3-NEXT: movapd %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2f64_22 -; SSE41: unpcklpd {{.*}} # xmm1 = xmm1[0,0] +; SSE41-LABEL: shuffle_v2f64_22: +; SSE41: # BB#0: +; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_22: +; AVX: # BB#0: +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: @shuffle_v2f64_32 -; SSE: pshufd {{.*}} # xmm0 = xmm1[2,3,0,1] +; SSE-LABEL: shuffle_v2f64_32: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2f64_32 -; AVX: vpermilpd {{.*}} # xmm0 = xmm1[1,0] +; AVX-LABEL: shuffle_v2f64_32: +; AVX: # BB#0: +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: @shuffle_v2f64_33 -; SSE: movhlps {{.*}} # xmm1 = xmm1[1,1] +; SSE-LABEL: shuffle_v2f64_33: +; SSE: # BB#0: +; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2f64_33 -; AVX: vmovhlps {{.*}} # xmm0 = xmm1[1,1] +; AVX-LABEL: shuffle_v2f64_33: +; AVX: # BB#0: +; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { -; SSE2-LABEL: @shuffle_v2f64_03 -; SSE2: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE2-LABEL: shuffle_v2f64_03: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2f64_03 -; SSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE3-LABEL: shuffle_v2f64_03: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2f64_03 -; SSSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSSE3-LABEL: shuffle_v2f64_03: +; SSSE3: # BB#0: +; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2f64_03 -; SSE41: blendpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE41-LABEL: shuffle_v2f64_03: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_03: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { -; SSE2-LABEL: @shuffle_v2f64_21 -; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE2-LABEL: shuffle_v2f64_21: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2f64_21 -; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE3-LABEL: shuffle_v2f64_21: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] ; SSE3-NEXT: movapd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2f64_21 -; SSSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSSE3-LABEL: shuffle_v2f64_21: +; SSSE3: # BB#0: +; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] ; SSSE3-NEXT: movapd %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2f64_21 -; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE41-LABEL: shuffle_v2f64_21: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_21: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_02 -; ALL: punpcklqdq {{.*}} # xmm0 = xmm0[0],xmm1[0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2i64_02: +; SSE: # BB#0: +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_02: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; SSE-LABEL: @shuffle_v2i64_02_copy -; SSE: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0] +; SSE-LABEL: shuffle_v2i64_02_copy: +; SSE: # BB#0: +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2i64_02_copy -; AVX: punpcklqdq {{.*}} # xmm0 = xmm1[0],xmm2[0] +; AVX-LABEL: shuffle_v2i64_02_copy: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { -; SSE2-LABEL: @shuffle_v2i64_03 -; SSE2: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE2-LABEL: shuffle_v2i64_03: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2i64_03 -; SSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSE3-LABEL: shuffle_v2i64_03: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2i64_03 -; SSSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] +; SSSE3-LABEL: shuffle_v2i64_03: +; SSSE3: # BB#0: +; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2i64_03 -; SSE41: pblendw {{.*}} # xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-LABEL: shuffle_v2i64_03: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_03: +; AVX: # BB#0: +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; SSE2-LABEL: @shuffle_v2i64_03_copy -; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] +; SSE2-LABEL: shuffle_v2i64_03_copy: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1] ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2i64_03_copy -; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] +; SSE3-LABEL: shuffle_v2i64_03_copy: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1] ; SSE3-NEXT: movapd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2i64_03_copy -; SSSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] +; SSSE3-LABEL: shuffle_v2i64_03_copy: +; SSSE3: # BB#0: +; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1] ; SSSE3-NEXT: movapd %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2i64_03_copy -; SSE41: pblendw {{.*}} # xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] +; SSE41-LABEL: shuffle_v2i64_03_copy: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_03_copy: +; AVX: # BB#0: +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { -; SSE2-LABEL: @shuffle_v2i64_12 -; SSE2: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE2-LABEL: shuffle_v2i64_12: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2i64_12 -; SSE3: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE3-LABEL: shuffle_v2i64_12: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2i64_12 -; SSSE3: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSSE3-LABEL: shuffle_v2i64_12: +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2i64_12 -; SSE41: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSE41-LABEL: shuffle_v2i64_12: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_12: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; SSE2-LABEL: @shuffle_v2i64_12_copy -; SSE2: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0] +; SSE2-LABEL: shuffle_v2i64_12_copy: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2i64_12_copy -; SSE3: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0] +; SSE3-LABEL: shuffle_v2i64_12_copy: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] ; SSE3-NEXT: movapd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2i64_12_copy -; SSSE3: palignr {{.*}} # xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; SSSE3-LABEL: shuffle_v2i64_12_copy: +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm2, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2i64_12_copy -; SSE41: palignr {{.*}} # xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; SSE41-LABEL: shuffle_v2i64_12_copy: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_12_copy: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { -; ALL-LABEL: @shuffle_v2i64_13 -; ALL: punpckhqdq {{.*}} # xmm0 = xmm0[1],xmm1[1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v2i64_13: +; SSE: # BB#0: +; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_13: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; SSE-LABEL: @shuffle_v2i64_13_copy -; SSE: punpckhqdq {{.*}} # xmm1 = xmm1[1],xmm2[1] +; SSE-LABEL: shuffle_v2i64_13_copy: +; SSE: # BB#0: +; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2i64_13_copy -; AVX: punpckhqdq {{.*}} # xmm0 = xmm1[1],xmm2[1] +; AVX-LABEL: shuffle_v2i64_13_copy: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { -; SSE-LABEL: @shuffle_v2i64_20 -; SSE: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0] +; SSE-LABEL: shuffle_v2i64_20: +; SSE: # BB#0: +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2i64_20 -; AVX: vpunpcklqdq {{.*}} # xmm0 = xmm1[0],xmm0[0] +; AVX-LABEL: shuffle_v2i64_20: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; SSE-LABEL: @shuffle_v2i64_20_copy -; SSE: punpcklqdq {{.*}} # xmm2 = xmm2[0],xmm1[0] +; SSE-LABEL: shuffle_v2i64_20_copy: +; SSE: # BB#0: +; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2i64_20_copy -; AVX: vpunpcklqdq {{.*}} # xmm0 = xmm2[0],xmm1[0] +; AVX-LABEL: shuffle_v2i64_20_copy: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { -; SSE2-LABEL: @shuffle_v2i64_21 -; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE2-LABEL: shuffle_v2i64_21: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2i64_21 -; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSE3-LABEL: shuffle_v2i64_21: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] ; SSE3-NEXT: movapd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2i64_21 -; SSSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1] +; SSSE3-LABEL: shuffle_v2i64_21: +; SSSE3: # BB#0: +; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] ; SSSE3-NEXT: movapd %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2i64_21 -; SSE41: pblendw {{.*}} # xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-LABEL: shuffle_v2i64_21: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_21: +; AVX: # BB#0: +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; SSE2-LABEL: @shuffle_v2i64_21_copy -; SSE2: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] +; SSE2-LABEL: shuffle_v2i64_21_copy: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] ; SSE2-NEXT: movapd %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2i64_21_copy -; SSE3: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] +; SSE3-LABEL: shuffle_v2i64_21_copy: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] ; SSE3-NEXT: movapd %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2i64_21_copy -; SSSE3: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] +; SSSE3-LABEL: shuffle_v2i64_21_copy: +; SSSE3: # BB#0: +; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] ; SSSE3-NEXT: movapd %xmm2, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2i64_21_copy -; SSE41: pblendw {{.*}} # xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; SSE41-LABEL: shuffle_v2i64_21_copy: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_21_copy: +; AVX: # BB#0: +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { -; SSE2-LABEL: @shuffle_v2i64_30 -; SSE2: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] +; SSE2-LABEL: shuffle_v2i64_30: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2i64_30 -; SSE3: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] +; SSE3-LABEL: shuffle_v2i64_30: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] ; SSE3-NEXT: movapd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2i64_30 -; SSSE3: palignr {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; SSSE3-LABEL: shuffle_v2i64_30: +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v2i64_30: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_30: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; SSE2-LABEL: @shuffle_v2i64_30_copy -; SSE2: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0] +; SSE2-LABEL: shuffle_v2i64_30_copy: +; SSE2: # BB#0: +; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE2-NEXT: movapd %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v2i64_30_copy -; SSE3: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0] +; SSE3-LABEL: shuffle_v2i64_30_copy: +; SSE3: # BB#0: +; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] ; SSE3-NEXT: movapd %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v2i64_30_copy -; SSSE3: palignr {{.*}} # xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSSE3-LABEL: shuffle_v2i64_30_copy: +; SSSE3: # BB#0: +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v2i64_30_copy -; SSE41: palignr {{.*}} # xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSE41-LABEL: shuffle_v2i64_30_copy: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_30_copy: +; AVX: # BB#0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { -; SSE-LABEL: @shuffle_v2i64_31 -; SSE: punpckhqdq {{.*}} # xmm1 = xmm1[1],xmm0[1] +; SSE-LABEL: shuffle_v2i64_31: +; SSE: # BB#0: +; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2i64_31 -; AVX: vpunpckhqdq {{.*}} # xmm0 = xmm1[1],xmm0[1] +; AVX-LABEL: shuffle_v2i64_31: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { -; SSE-LABEL: @shuffle_v2i64_31_copy -; SSE: punpckhqdq {{.*}} # xmm2 = xmm2[1],xmm1[1] +; SSE-LABEL: shuffle_v2i64_31_copy: +; SSE: # BB#0: +; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v2i64_31_copy -; AVX: vpunpckhqdq {{.*}} # xmm0 = xmm2[1],xmm1[1] +; AVX-LABEL: shuffle_v2i64_31_copy: +; AVX: # BB#0: +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -438,12 +622,14 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { -; SSE-LABEL: @insert_reg_and_zero_v2i64 -; SSE: movd %rdi, %xmm0 +; SSE-LABEL: insert_reg_and_zero_v2i64: +; SSE: # BB#0: +; SSE-NEXT: movd %rdi, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @insert_reg_and_zero_v2i64 -; AVX: vmovq %rdi, %xmm0 +; AVX-LABEL: insert_reg_and_zero_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vmovq %rdi, %xmm0 ; AVX-NEXT: retq %v = insertelement <2 x i64> undef, i64 %a, i32 0 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> @@ -451,9 +637,15 @@ define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { } define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { -; ALL-LABEL: @insert_mem_and_zero_v2i64 -; ALL: movq (%rdi), %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: insert_mem_and_zero_v2i64: +; SSE: # BB#0: +; SSE-NEXT: movq (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_mem_and_zero_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vmovq (%rdi), %xmm0 +; AVX-NEXT: retq %a = load i64* %ptr %v = insertelement <2 x i64> undef, i64 %a, i32 0 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> @@ -461,18 +653,30 @@ define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { } define <2 x double> @insert_reg_and_zero_v2f64(double %a) { -; ALL-LABEL: @insert_reg_and_zero_v2f64 -; ALL: movq %xmm0, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: insert_reg_and_zero_v2f64: +; SSE: # BB#0: +; SSE-NEXT: movq %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_reg_and_zero_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vmovq %xmm0, %xmm0 +; AVX-NEXT: retq %v = insertelement <2 x double> undef, double %a, i32 0 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { -; ALL-LABEL: @insert_mem_and_zero_v2f64 -; ALL: movsd (%rdi), %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: insert_mem_and_zero_v2f64: +; SSE: # BB#0: +; SSE-NEXT: movsd (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_mem_and_zero_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vmovsd (%rdi), %xmm0 +; AVX-NEXT: retq %a = load double* %ptr %v = insertelement <2 x double> undef, double %a, i32 0 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> @@ -480,45 +684,62 @@ define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { } define <2 x double> @insert_dup_reg_v2f64(double %a) { -; SSE2-LABEL: @insert_dup_reg_v2f64 -; SSE2: movlhps {{.*}} # xmm0 = xmm0[0,0] +; FIXME: We should match movddup for SSE3 and higher here. +; +; SSE2-LABEL: insert_dup_reg_v2f64: +; SSE2: # BB#0: +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; -; FIXME: This should match movddup as well! -; SSE3-LABEL: @insert_dup_reg_v2f64 -; SSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0] +; SSE3-LABEL: insert_dup_reg_v2f64: +; SSE3: # BB#0: +; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] ; SSE3-NEXT: retq ; -; FIXME: This should match movddup as well! -; SSSE3-LABEL: @insert_dup_reg_v2f64 -; SSSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0] +; SSSE3-LABEL: insert_dup_reg_v2f64: +; SSSE3: # BB#0: +; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] ; SSSE3-NEXT: retq ; -; FIXME: This should match movddup as well! -; SSE41-LABEL: @insert_dup_reg_v2f64 -; SSE41: unpcklpd {{.*}} # xmm0 = xmm0[0,0] +; SSE41-LABEL: insert_dup_reg_v2f64: +; SSE41: # BB#0: +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] ; SSE41-NEXT: retq +; +; AVX-LABEL: insert_dup_reg_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] +; AVX-NEXT: retq %v = insertelement <2 x double> undef, double %a, i32 0 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { -; SSE2-LABEL: @insert_dup_mem_v2f64 -; SSE2: movsd {{.*}}, %xmm0 -; SSE2-NEXT: movlhps {{.*}} # xmm0 = xmm0[0,0] +; SSE2-LABEL: insert_dup_mem_v2f64: +; SSE2: # BB#0: +; SSE2-NEXT: movsd (%rdi), %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @insert_dup_mem_v2f64 -; SSE3: movddup {{.*}}, %xmm0 +; SSE3-LABEL: insert_dup_mem_v2f64: +; SSE3: # BB#0: +; SSE3-NEXT: movddup (%rdi), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @insert_dup_mem_v2f64 -; SSSE3: movddup {{.*}}, %xmm0 +; SSSE3-LABEL: insert_dup_mem_v2f64: +; SSSE3: # BB#0: +; SSSE3-NEXT: movddup (%rdi), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @insert_dup_mem_v2f64 -; SSE41: movddup {{.*}}, %xmm0 +; SSE41-LABEL: insert_dup_mem_v2f64: +; SSE41: # BB#0: +; SSE41-NEXT: movddup (%rdi), %xmm0 ; SSE41-NEXT: retq +; +; AVX-LABEL: insert_dup_mem_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vmovddup (%rdi), %xmm0 +; AVX-NEXT: retq %a = load double* %ptr %v = insertelement <2 x double> undef, double %a, i32 0 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> @@ -526,15 +747,15 @@ define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { } define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { -; SSE-LABEL: @shuffle_mem_v2f64_10 +; SSE-LABEL: shuffle_mem_v2f64_10: ; SSE: # BB#0: ; SSE-NEXT: movapd (%rdi), %xmm0 -; SSE-NEXT: shufpd {{.*}} # xmm0 = xmm0[1,0] +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_mem_v2f64_10 +; AVX-LABEL: shuffle_mem_v2f64_10: ; AVX: # BB#0: -; AVX-NEXT: vpermilpd {{.*}} # xmm0 = mem[1,0] +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] ; AVX-NEXT: retq %a = load <2 x double>* %ptr %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 1d0a90f9a94..c4c97f6b73e 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -8,789 +8,972 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_0001 -; ALL: pshufd {{.*}} # xmm0 = xmm0[0,0,0,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_0001: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_0001: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_0020 -; ALL: pshufd {{.*}} # xmm0 = xmm0[0,0,2,0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_0020: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_0020: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_0112 -; ALL: pshufd {{.*}} # xmm0 = xmm0[0,1,1,2] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_0112: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_0112: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_0300 -; ALL: pshufd {{.*}} # xmm0 = xmm0[0,3,0,0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_0300: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_0300: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_1000 -; ALL: pshufd {{.*}} # xmm0 = xmm0[1,0,0,0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_1000: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_1000: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_2200 -; ALL: pshufd {{.*}} # xmm0 = xmm0[2,2,0,0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_2200: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_2200: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_3330 -; ALL: pshufd {{.*}} # xmm0 = xmm0[3,3,3,0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_3330: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_3330: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_3210 -; ALL: pshufd {{.*}} # xmm0 = xmm0[3,2,1,0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_3210: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_3210: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_2121 -; ALL: pshufd {{.*}} # xmm0 = xmm0[2,1,2,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_2121: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_2121: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: @shuffle_v4f32_0001 +; SSE-LABEL: shuffle_v4f32_0001: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0,0,1] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_0001 +; AVX-LABEL: shuffle_v4f32_0001: ; AVX: # BB#0: -; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[0,0,0,1] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: @shuffle_v4f32_0020 +; SSE-LABEL: shuffle_v4f32_0020: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0,2,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_0020 +; AVX-LABEL: shuffle_v4f32_0020: ; AVX: # BB#0: -; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[0,0,2,0] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: @shuffle_v4f32_0300 +; SSE-LABEL: shuffle_v4f32_0300: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[0,3,0,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_0300 +; AVX-LABEL: shuffle_v4f32_0300: ; AVX: # BB#0: -; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[0,3,0,0] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: @shuffle_v4f32_1000 +; SSE-LABEL: shuffle_v4f32_1000: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[1,0,0,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_1000 +; AVX-LABEL: shuffle_v4f32_1000: ; AVX: # BB#0: -; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[1,0,0,0] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: @shuffle_v4f32_2200 +; SSE-LABEL: shuffle_v4f32_2200: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[2,2,0,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_2200 +; AVX-LABEL: shuffle_v4f32_2200: ; AVX: # BB#0: -; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[2,2,0,0] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: @shuffle_v4f32_3330 +; SSE-LABEL: shuffle_v4f32_3330: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[3,3,3,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_3330 +; AVX-LABEL: shuffle_v4f32_3330: ; AVX: # BB#0: -; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[3,3,3,0] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: @shuffle_v4f32_3210 +; SSE-LABEL: shuffle_v4f32_3210: ; SSE: # BB#0: -; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[3,2,1,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_3210 +; AVX-LABEL: shuffle_v4f32_3210: ; AVX: # BB#0: -; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[3,2,1,0] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) { -; ALL-LABEL: @shuffle_v4f32_0011 -; ALL: unpcklps {{.*}} # xmm0 = xmm0[0,0,1,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4f32_0011: +; SSE: # BB#0: +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4f32_0011: +; AVX: # BB#0: +; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) { -; ALL-LABEL: @shuffle_v4f32_2233 -; ALL: unpckhps {{.*}} # xmm0 = xmm0[2,2,3,3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4f32_2233: +; SSE: # BB#0: +; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4f32_2233: +; AVX: # BB#0: +; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { -; SSE2-LABEL: @shuffle_v4f32_0022 -; SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,2,2] +; SSE2-LABEL: shuffle_v4f32_0022: +; SSE2: # BB#0: +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4f32_0022 -; SSE3: movsldup {{.*}} # xmm0 = xmm0[0,0,2,2] +; SSE3-LABEL: shuffle_v4f32_0022: +; SSE3: # BB#0: +; SSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4f32_0022 -; SSSE3: movsldup {{.*}} # xmm0 = xmm0[0,0,2,2] +; SSSE3-LABEL: shuffle_v4f32_0022: +; SSSE3: # BB#0: +; SSSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4f32_0022 -; SSE41: movsldup {{.*}} # xmm0 = xmm0[0,0,2,2] +; SSE41-LABEL: shuffle_v4f32_0022: +; SSE41: # BB#0: +; SSE41-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_0022 -; AVX: vmovsldup {{.*}} # xmm0 = xmm0[0,0,2,2] +; AVX-LABEL: shuffle_v4f32_0022: +; AVX: # BB#0: +; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { -; SSE2-LABEL: @shuffle_v4f32_1133 -; SSE2: shufps {{.*}} # xmm0 = xmm0[1,1,3,3] +; SSE2-LABEL: shuffle_v4f32_1133: +; SSE2: # BB#0: +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4f32_1133 -; SSE3: movshdup {{.*}} # xmm0 = xmm0[1,1,3,3] +; SSE3-LABEL: shuffle_v4f32_1133: +; SSE3: # BB#0: +; SSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4f32_1133 -; SSSE3: movshdup {{.*}} # xmm0 = xmm0[1,1,3,3] +; SSSE3-LABEL: shuffle_v4f32_1133: +; SSSE3: # BB#0: +; SSSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4f32_1133 -; SSE41: movshdup {{.*}} # xmm0 = xmm0[1,1,3,3] +; SSE41-LABEL: shuffle_v4f32_1133: +; SSE41: # BB#0: +; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_1133 -; AVX: vmovshdup {{.*}} # xmm0 = xmm0[1,1,3,3] +; AVX-LABEL: shuffle_v4f32_1133: +; AVX: # BB#0: +; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_0124 -; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0] +; SSE2-LABEL: shuffle_v4i32_0124: +; SSE2: # BB#0: +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4i32_0124 -; SSE3: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0] +; SSE3-LABEL: shuffle_v4i32_0124: +; SSE3: # BB#0: +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4i32_0124 -; SSSE3: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0] +; SSSE3-LABEL: shuffle_v4i32_0124: +; SSSE3: # BB#0: +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4i32_0124 -; SSE41: insertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0] +; SSE41-LABEL: shuffle_v4i32_0124: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_0124 -; AVX: vinsertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0] +; AVX-LABEL: shuffle_v4i32_0124: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_0142 -; ALL: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0] -; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,2] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_0142: +; SSE: # BB#0: +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_0142: +; AVX: # BB#0: +; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) { -; SSE-LABEL: @shuffle_v4i32_0412 -; SSE: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSE-NEXT: shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2] +; SSE-LABEL: shuffle_v4i32_0412: +; SSE: # BB#0: +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_0412 -; AVX: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; AVX-NEXT: vshufps {{.*}} # xmm0 = xmm1[2,0],xmm0[1,2] +; AVX-LABEL: shuffle_v4i32_0412: +; AVX: # BB#0: +; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[1,2] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) { -; SSE-LABEL: @shuffle_v4i32_4012 -; SSE: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSE-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] +; SSE-LABEL: shuffle_v4i32_4012: +; SSE: # BB#0: +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_4012 -; AVX: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; AVX-NEXT: vshufps {{.*}} # xmm0 = xmm1[0,2],xmm0[1,2] +; AVX-LABEL: shuffle_v4i32_4012: +; AVX: # BB#0: +; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,2] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_0145 -; ALL: punpcklqdq {{.*}} # xmm0 = xmm0[0],xmm1[0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_0145: +; SSE: # BB#0: +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_0145: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_0451 -; ALL: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1] -; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,3,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_0451: +; SSE: # BB#0: +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_0451: +; AVX: # BB#0: +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1] +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,3,1] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) { -; SSE-LABEL: @shuffle_v4i32_4501 -; SSE: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0] +; SSE-LABEL: shuffle_v4i32_4501: +; SSE: # BB#0: +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_4501 -; AVX: punpcklqdq {{.*}} # xmm0 = xmm1[0],xmm0[0] +; AVX-LABEL: shuffle_v4i32_4501: +; AVX: # BB#0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_4015 -; ALL: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1] -; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0,1,3] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_4015: +; SSE: # BB#0: +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_4015: +; AVX: # BB#0: +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1] +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0,1,3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) { -; SSE2-LABEL: @shuffle_v4f32_4zzz -; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][1,0] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X]][2,3] +; SSE2-LABEL: shuffle_v4f32_4zzz: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4f32_4zzz -; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][1,0] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X]][2,3] +; SSE3-LABEL: shuffle_v4f32_4zzz: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4f32_4zzz -; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][1,0] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X]][2,3] +; SSSE3-LABEL: shuffle_v4f32_4zzz: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4f32_4zzz -; SSE41: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE41-NEXT: blendps {{.*}} # [[X]] = xmm0[0],[[X]][1,2,3] -; SSE41-NEXT: movaps %[[X]], %xmm0 +; SSE41-LABEL: shuffle_v4f32_4zzz: +; SSE41: # BB#0: +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_4zzz -; AVX: vxorps %[[X:xmm[0-9]+]], %[[X]] -; AVX-NEXT: vblendps {{.*}} # xmm0 = xmm0[0],[[X]][1,2,3] +; AVX-LABEL: shuffle_v4f32_4zzz: +; AVX: # BB#0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) { -; SSE2-LABEL: @shuffle_v4f32_z4zz -; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][2,0] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][3,0] +; SSE2-LABEL: shuffle_v4f32_z4zz: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4f32_z4zz -; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][2,0] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][3,0] +; SSE3-LABEL: shuffle_v4f32_z4zz: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4f32_z4zz -; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][2,0] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][3,0] +; SSSE3-LABEL: shuffle_v4f32_z4zz: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4f32_z4zz -; SSE41: insertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero +; SSE41-LABEL: shuffle_v4f32_z4zz: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_z4zz -; AVX: vinsertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero +; AVX-LABEL: shuffle_v4f32_z4zz: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) { -; SSE2-LABEL: @shuffle_v4f32_zz4z -; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][0,0] -; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,0],xmm0[0,2] -; SSE2-NEXT: movaps %[[X]], %xmm0 +; SSE2-LABEL: shuffle_v4f32_zz4z: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2] +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4f32_zz4z -; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][0,0] -; SSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,0],xmm0[0,2] -; SSE3-NEXT: movaps %[[X]], %xmm0 +; SSE3-LABEL: shuffle_v4f32_zz4z: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2] +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4f32_zz4z -; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][0,0] -; SSSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,0],xmm0[0,2] -; SSSE3-NEXT: movaps %[[X]], %xmm0 +; SSSE3-LABEL: shuffle_v4f32_zz4z: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2] +; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4f32_zz4z -; SSE41: insertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero +; SSE41-LABEL: shuffle_v4f32_zz4z: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_zz4z -; AVX: vinsertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero +; AVX-LABEL: shuffle_v4f32_zz4z: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) { -; SSE2-LABEL: @shuffle_v4f32_zuu4 -; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0] -; SSE2-NEXT: movaps %[[X]], %xmm0 +; SSE2-LABEL: shuffle_v4f32_zuu4: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4f32_zuu4 -; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0] -; SSE3-NEXT: movaps %[[X]], %xmm0 +; SSE3-LABEL: shuffle_v4f32_zuu4: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4f32_zuu4 -; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0] -; SSSE3-NEXT: movaps %[[X]], %xmm0 +; SSSE3-LABEL: shuffle_v4f32_zuu4: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4f32_zuu4 -; SSE41: insertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0] +; SSE41-LABEL: shuffle_v4f32_zuu4: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_zuu4 -; AVX: vinsertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0] +; AVX-LABEL: shuffle_v4f32_zuu4: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) { -; SSE2-LABEL: @shuffle_v4f32_zzz7 -; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],[[X]][2,0] -; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0] -; SSE2-NEXT: movaps %[[X]], %xmm0 +; SSE2-LABEL: shuffle_v4f32_zzz7: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4f32_zzz7 -; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],[[X]][2,0] -; SSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0] -; SSE3-NEXT: movaps %[[X]], %xmm0 +; SSE3-LABEL: shuffle_v4f32_zzz7: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4f32_zzz7 -; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],[[X]][2,0] -; SSSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0] -; SSSE3-NEXT: movaps %[[X]], %xmm0 +; SSSE3-LABEL: shuffle_v4f32_zzz7: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4f32_zzz7 -; SSE41: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE41-NEXT: blendps {{.*}} # [[X]] = [[X]][0,1,2],xmm0[3] -; SSE41-NEXT: movaps %[[X]], %xmm0 +; SSE41-LABEL: shuffle_v4f32_zzz7: +; SSE41: # BB#0: +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_zzz7 -; AVX: vxorps %[[X:xmm[0-9]+]], %[[X]] -; AVX-NEXT: vblendps {{.*}} # xmm0 = [[X]][0,1,2],xmm0[3] +; AVX-LABEL: shuffle_v4f32_zzz7: +; AVX: # BB#0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) { -; SSE2-LABEL: @shuffle_v4f32_z6zz -; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][0,0] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][2,3] +; SSE2-LABEL: shuffle_v4f32_z6zz: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4f32_z6zz -; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][0,0] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][2,3] +; SSE3-LABEL: shuffle_v4f32_z6zz: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4f32_z6zz -; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][0,0] -; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][2,3] +; SSSE3-LABEL: shuffle_v4f32_z6zz: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4f32_z6zz -; SSE41: insertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero +; SSE41-LABEL: shuffle_v4f32_z6zz: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4f32_z6zz -; AVX: vinsertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero +; AVX-LABEL: shuffle_v4f32_z6zz: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } define <4 x i32> @shuffle_v4i32_4zzz(i32 %i) { -; ALL-LABEL: @shuffle_v4i32_4zzz -; ALL: movd {{.*}}, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_4zzz: +; SSE: # BB#0: +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_4zzz: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: retq %a = insertelement <4 x i32> undef, i32 %i, i32 0 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_z4zz(i32 %i) { -; ALL-LABEL: @shuffle_v4i32_z4zz -; ALL: movd {{.*}}, %xmm0 -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,1,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_z4zz: +; SSE: # BB#0: +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_z4zz: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; AVX-NEXT: retq %a = insertelement <4 x i32> undef, i32 %i, i32 0 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_zz4z(i32 %i) { -; ALL-LABEL: @shuffle_v4i32_zz4z -; ALL: movd {{.*}}, %xmm0 -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,1,0,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_zz4z: +; SSE: # BB#0: +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_zz4z: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] +; AVX-NEXT: retq %a = insertelement <4 x i32> undef, i32 %i, i32 0 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_zuu4(i32 %i) { -; ALL-LABEL: @shuffle_v4i32_zuu4 -; ALL: movd {{.*}}, %xmm0 -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,1,1,0] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_zuu4: +; SSE: # BB#0: +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_zuu4: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,0] +; AVX-NEXT: retq %a = insertelement <4 x i32> undef, i32 %i, i32 0 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_z6zz(i32 %i) { -; ALL-LABEL: @shuffle_v4i32_z6zz -; ALL: movd {{.*}}, %xmm0 -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,1,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_z6zz: +; SSE: # BB#0: +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_z6zz: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; AVX-NEXT: retq %a = insertelement <4 x i32> undef, i32 %i, i32 2 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_7012 +; SSE2-LABEL: shuffle_v4i32_7012: ; SSE2: # BB#0: -; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[3,0],xmm0[0,0] -; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4i32_7012 +; SSE3-LABEL: shuffle_v4i32_7012: ; SSE3: # BB#0: -; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[3,0],xmm0[0,0] -; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] ; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4i32_7012 +; SSSE3-LABEL: shuffle_v4i32_7012: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4i32_7012 +; SSE41-LABEL: shuffle_v4i32_7012: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_7012 +; AVX-LABEL: shuffle_v4i32_7012: ; AVX: # BB#0: -; AVX-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_6701 +; SSE2-LABEL: shuffle_v4i32_6701: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4i32_6701 +; SSE3-LABEL: shuffle_v4i32_6701: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0] +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] ; SSE3-NEXT: movapd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4i32_6701 +; SSSE3-LABEL: shuffle_v4i32_6701: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4i32_6701 +; SSE41-LABEL: shuffle_v4i32_6701: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_6701 +; AVX-LABEL: shuffle_v4i32_6701: ; AVX: # BB#0: -; AVX-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_5670 +; SSE2-LABEL: shuffle_v4i32_5670: ; SSE2: # BB#0: -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],xmm1[3,0] -; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[1,2],xmm0[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0] ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4i32_5670 +; SSE3-LABEL: shuffle_v4i32_5670: ; SSE3: # BB#0: -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],xmm1[3,0] -; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[1,2],xmm0[2,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0] ; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4i32_5670 +; SSSE3-LABEL: shuffle_v4i32_5670: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4i32_5670 +; SSE41-LABEL: shuffle_v4i32_5670: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_5670 +; AVX-LABEL: shuffle_v4i32_5670: ; AVX: # BB#0: -; AVX-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_1234 +; SSE2-LABEL: shuffle_v4i32_1234: ; SSE2: # BB#0: -; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[3,0] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[1,2],xmm1[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4i32_1234 +; SSE3-LABEL: shuffle_v4i32_1234: ; SSE3: # BB#0: -; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[3,0] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[1,2],xmm1[2,0] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4i32_1234 +; SSSE3-LABEL: shuffle_v4i32_1234: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $4, {{.*}} # xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4i32_1234 +; SSE41-LABEL: shuffle_v4i32_1234: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $4, {{.*}} # xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_1234 +; AVX-LABEL: shuffle_v4i32_1234: ; AVX: # BB#0: -; AVX-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_2345 +; SSE2-LABEL: shuffle_v4i32_2345: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4i32_2345 +; SSE3-LABEL: shuffle_v4i32_2345: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4i32_2345 +; SSSE3-LABEL: shuffle_v4i32_2345: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $8, {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4i32_2345 +; SSE41-LABEL: shuffle_v4i32_2345: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $8, {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_2345 +; AVX-LABEL: shuffle_v4i32_2345: ; AVX: # BB#0: -; AVX-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_3456 +; SSE2-LABEL: shuffle_v4i32_3456: ; SSE2: # BB#0: -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],xmm1[0,0] -; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[1,2] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4i32_3456 +; SSE3-LABEL: shuffle_v4i32_3456: ; SSE3: # BB#0: -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],xmm1[0,0] -; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[1,2] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4i32_3456 +; SSSE3-LABEL: shuffle_v4i32_3456: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $12, {{.*}} # xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4i32_3456 +; SSE41-LABEL: shuffle_v4i32_3456: ; SSE41: # BB#0: -; SSE41-NEXT: palignr $12, {{.*}} # xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_3456 +; AVX-LABEL: shuffle_v4i32_3456: ; AVX: # BB#0: -; AVX-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: @shuffle_v4i32_0u1u -; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1] -; ALL-NEXT: retq +; SSE-LABEL: shuffle_v4i32_0u1u: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v4i32_0u1u: +; AVX: # BB#0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) { -; SSE2-LABEL: @shuffle_v4i32_0z1z +; SSE2-LABEL: shuffle_v4i32_0z1z: ; SSE2: # BB#0: -; SSE2-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; -; SSE3-LABEL: @shuffle_v4i32_0z1z +; SSE3-LABEL: shuffle_v4i32_0z1z: ; SSE3: # BB#0: -; SSE3-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]] -; SSE3-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1] +; SSE3-NEXT: pxor %xmm1, %xmm1 +; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE3-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v4i32_0z1z +; SSSE3-LABEL: shuffle_v4i32_0z1z: ; SSSE3: # BB#0: -; SSSE3-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v4i32_0z1z +; SSE41-LABEL: shuffle_v4i32_0z1z: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxdq %xmm0, %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: @shuffle_v4i32_0z1z +; AVX-LABEL: shuffle_v4i32_0z1z: ; AVX: # BB#0: ; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 ; AVX-NEXT: retq @@ -799,20 +982,30 @@ define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) { } define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) { -; ALL-LABEL: @insert_reg_and_zero_v4i32 -; ALL: # BB#0: -; ALL-NEXT: movd %edi, %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: insert_reg_and_zero_v4i32: +; SSE: # BB#0: +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_reg_and_zero_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: retq %v = insertelement <4 x i32> undef, i32 %a, i32 0 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) { -; ALL-LABEL: @insert_mem_and_zero_v4i32 -; ALL: # BB#0: -; ALL-NEXT: movd (%rdi), %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: insert_mem_and_zero_v4i32: +; SSE: # BB#0: +; SSE-NEXT: movd (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_mem_and_zero_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vmovd (%rdi), %xmm0 +; AVX-NEXT: retq %a = load i32* %ptr %v = insertelement <4 x i32> undef, i32 %a, i32 0 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> @@ -820,17 +1013,17 @@ define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) { } define <4 x float> @insert_reg_and_zero_v4f32(float %a) { -; SSE-LABEL: @insert_reg_and_zero_v4f32 +; SSE-LABEL: insert_reg_and_zero_v4f32: ; SSE: # BB#0: -; SSE-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE-NEXT: movss %xmm0, %[[X]] -; SSE-NEXT: movaps %[[X]], %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: movss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: @insert_reg_and_zero_v4f32 +; AVX-LABEL: insert_reg_and_zero_v4f32: ; AVX: # BB#0: -; AVX-NEXT: vxorps %[[X:xmm[0-9]+]], %[[X]], %[[X]] -; AVX-NEXT: vmovss %xmm0, %[[X]], %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %a, i32 0 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> @@ -838,10 +1031,15 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) { } define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) { -; ALL-LABEL: @insert_mem_and_zero_v4f32 -; ALL: # BB#0: -; ALL-NEXT: movss (%rdi), %xmm0 -; ALL-NEXT: retq +; SSE-LABEL: insert_mem_and_zero_v4f32: +; SSE: # BB#0: +; SSE-NEXT: movss (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: insert_mem_and_zero_v4f32: +; AVX: # BB#0: +; AVX-NEXT: vmovss (%rdi), %xmm0 +; AVX-NEXT: retq %a = load float* %ptr %v = insertelement <4 x float> undef, float %a, i32 0 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> @@ -849,15 +1047,15 @@ define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) { } define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) { -; SSE-LABEL: @shuffle_mem_v4f32_3210 +; SSE-LABEL: shuffle_mem_v4f32_3210: ; SSE: # BB#0: ; SSE-NEXT: movaps (%rdi), %xmm0 -; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[3,2,1,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; SSE-NEXT: retq ; -; AVX-LABEL: @shuffle_mem_v4f32_3210 +; AVX-LABEL: shuffle_mem_v4f32_3210: ; AVX: # BB#0: -; AVX-NEXT: vpermilps {{.*}} # xmm0 = mem[3,2,1,0] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0] ; AVX-NEXT: retq %a = load <4 x float>* %ptr %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index ddd3914d2cb..a3ca88c927b 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1,593 +1,730 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE41 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_01012323 +; ALL-LABEL: shuffle_v8i16_01012323: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_67452301 +; ALL-LABEL: shuffle_v8i16_67452301: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,2,1,0] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_456789AB +; SSE2-LABEL: shuffle_v8i16_456789AB: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_456789AB +; SSSE3-LABEL: shuffle_v8i16_456789AB: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_456789AB: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_00000000 +; SSE2-LABEL: shuffle_v8i16_00000000: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_00000000 +; SSSE3-LABEL: shuffle_v8i16_00000000: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_00000000: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_00004444 +; ALL-LABEL: shuffle_v8i16_00004444: ; ALL: # BB#0: -; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] -; ALL-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; ALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_u0u1u2u3 +; ALL-LABEL: shuffle_v8i16_u0u1u2u3: ; ALL: # BB#0: -; ALL-NEXT: unpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3] +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_u4u5u6u7 +; ALL-LABEL: shuffle_v8i16_u4u5u6u7: ; ALL: # BB#0: -; ALL-NEXT: unpckhwd {{.*}} # xmm0 = xmm0[4,4,5,5,6,6,7,7] +; ALL-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_31206745 +; ALL-LABEL: shuffle_v8i16_31206745: ; ALL: # BB#0: -; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7] -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2] +; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_44440000 +; SSE2-LABEL: shuffle_v8i16_44440000: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,0,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_44440000 +; SSSE3-LABEL: shuffle_v8i16_44440000: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_44440000: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_23016745 +; ALL-LABEL: shuffle_v8i16_23016745: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,3,2] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_23026745 +; ALL-LABEL: shuffle_v8i16_23026745: ; ALL: # BB#0: -; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,3,0,2,4,5,6,7] -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2] +; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_23016747 +; ALL-LABEL: shuffle_v8i16_23016747: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,2,3] -; ALL-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,7] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] +; ALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_75643120 +; SSE2-LABEL: shuffle_v8i16_75643120: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_75643120 +; SSSE3-LABEL: shuffle_v8i16_75643120: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_75643120: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_10545410 +; SSE2-LABEL: shuffle_v8i16_10545410: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_10545410 +; SSSE3-LABEL: shuffle_v8i16_10545410: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_10545410: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_54105410 +; SSE2-LABEL: shuffle_v8i16_54105410: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_54105410 +; SSSE3-LABEL: shuffle_v8i16_54105410: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_54105410: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_54101054 +; SSE2-LABEL: shuffle_v8i16_54101054: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_54101054 +; SSSE3-LABEL: shuffle_v8i16_54101054: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_54101054: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_04400440 +; SSE2-LABEL: shuffle_v8i16_04400440: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,4,4,6] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_04400440 +; SSSE3-LABEL: shuffle_v8i16_04400440: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_04400440: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_40044004 +; SSE2-LABEL: shuffle_v8i16_40044004: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,0,2,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_40044004 +; SSSE3-LABEL: shuffle_v8i16_40044004: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_40044004: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_26405173 +; SSE2-LABEL: shuffle_v8i16_26405173: ; SSE2: # BB#0: -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,3,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_26405173 +; SSSE3-LABEL: shuffle_v8i16_26405173: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_26405173: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_20645173 +; SSE2-LABEL: shuffle_v8i16_20645173: ; SSE2: # BB#0: -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_20645173 +; SSSE3-LABEL: shuffle_v8i16_20645173: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_20645173: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_26401375 +; SSE2-LABEL: shuffle_v8i16_26401375: ; SSE2: # BB#0: -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_26401375 +; SSSE3-LABEL: shuffle_v8i16_26401375: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_26401375: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_66751643 +; SSE2-LABEL: shuffle_v8i16_66751643: ; SSE2: # BB#0: -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,5,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,1,3,2,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_66751643 +; SSSE3-LABEL: shuffle_v8i16_66751643: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_66751643: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_60514754 +; SSE2-LABEL: shuffle_v8i16_60514754: ; SSE2: # BB#0: -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,3,1,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,5,6] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_60514754 +; SSSE3-LABEL: shuffle_v8i16_60514754: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_60514754: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_00444444 +; SSE2-LABEL: shuffle_v8i16_00444444: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_00444444 +; SSSE3-LABEL: shuffle_v8i16_00444444: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_00444444: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_44004444 +; SSE2-LABEL: shuffle_v8i16_44004444: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,2,0,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_44004444 +; SSSE3-LABEL: shuffle_v8i16_44004444: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_44004444: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_04404444 +; SSE2-LABEL: shuffle_v8i16_04404444: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_04404444 +; SSSE3-LABEL: shuffle_v8i16_04404444: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_04404444: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_04400000 +; SSE2-LABEL: shuffle_v8i16_04400000: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_04400000 +; SSSE3-LABEL: shuffle_v8i16_04400000: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_04400000: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_04404567 +; ALL-LABEL: shuffle_v8i16_04404567: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_0X444444 +; SSE2-LABEL: shuffle_v8i16_0X444444: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,2,2,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_0X444444 +; SSSE3-LABEL: shuffle_v8i16_0X444444: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,{{[0-9]+,[0-9]+}},8,9,8,9,8,9,8,9,8,9,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_0X444444: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_44X04444 +; SSE2-LABEL: shuffle_v8i16_44X04444: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,2,2,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_44X04444 +; SSSE3-LABEL: shuffle_v8i16_44X04444: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,{{[0-9]+,[0-9]+}},0,1,8,9,8,9,8,9,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_44X04444: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_X4404444 +; SSE2-LABEL: shuffle_v8i16_X4404444: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_X4404444 +; SSSE3-LABEL: shuffle_v8i16_X4404444: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+}},8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_X4404444: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_0127XXXX +; SSE2-LABEL: shuffle_v8i16_0127XXXX: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_0127XXXX +; SSSE3-LABEL: shuffle_v8i16_0127XXXX: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,2,3,4,5,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_0127XXXX: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_XXXX4563 +; SSE2-LABEL: shuffle_v8i16_XXXX4563: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,0] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_XXXX4563 +; SSSE3-LABEL: shuffle_v8i16_XXXX4563: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}},8,9,10,11,12,13,6,7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_XXXX4563: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_4563XXXX +; SSE2-LABEL: shuffle_v8i16_4563XXXX: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_4563XXXX +; SSSE3-LABEL: shuffle_v8i16_4563XXXX: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_4563XXXX: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_01274563 +; SSE2-LABEL: shuffle_v8i16_01274563: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_01274563 +; SSSE3-LABEL: shuffle_v8i16_01274563: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_01274563: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_45630127 +; SSE2-LABEL: shuffle_v8i16_45630127: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,1,4,5,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,3,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_45630127 +; SSSE3-LABEL: shuffle_v8i16_45630127: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_45630127: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_37102735 +; SSE2-LABEL: shuffle_v8i16_37102735: ; SSE2: # BB#0: -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,5,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,4,5,6] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_37102735 +; SSSE3-LABEL: shuffle_v8i16_37102735: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_37102735: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_08192a3b +; ALL-LABEL: shuffle_v8i16_08192a3b: ; ALL: # BB#0: -; ALL-NEXT: punpcklwd %xmm1, %xmm0 +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_0c1d2e3f +; ALL-LABEL: shuffle_v8i16_0c1d2e3f: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1] -; ALL-NEXT: punpcklwd %xmm1, %xmm0 +; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_4c5d6e7f +; ALL-LABEL: shuffle_v8i16_4c5d6e7f: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1] -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1] -; ALL-NEXT: punpcklwd %xmm1, %xmm0 +; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_48596a7b +; ALL-LABEL: shuffle_v8i16_48596a7b: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1] -; ALL-NEXT: punpcklwd %xmm1, %xmm0 +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_08196e7f +; ALL-LABEL: shuffle_v8i16_08196e7f: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,3,2,3] -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3] -; ALL-NEXT: punpcklwd %xmm1, %xmm0 +; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_0c1d6879 +; ALL-LABEL: shuffle_v8i16_0c1d6879: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,0,2,3] -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3] -; ALL-NEXT: punpcklwd %xmm1, %xmm0 +; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_109832ba +; ALL-LABEL: shuffle_v8i16_109832ba: ; ALL: # BB#0: -; ALL-NEXT: punpcklwd %xmm1, %xmm0 -; ALL-NEXT: pshuflw {{.*}} # xmm1 = xmm0[2,0,3,1,4,5,6,7] -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3] -; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,3,1,4,5,6,7] -; ALL-NEXT: punpcklqdq %xmm0, %xmm1 +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] +; ALL-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; ALL-NEXT: movdqa %xmm1, %xmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -595,235 +732,300 @@ define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { } define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_8091a2b3 +; ALL-LABEL: shuffle_v8i16_8091a2b3: ; ALL: # BB#0: -; ALL-NEXT: punpcklwd %xmm0, %xmm1 +; ALL-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; ALL-NEXT: movdqa %xmm1, %xmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_c4d5e6f7 +; ALL-LABEL: shuffle_v8i16_c4d5e6f7: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm2 = xmm0[2,3,0,1] -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm1[2,3,0,1] -; ALL-NEXT: punpcklwd %xmm2, %xmm0 +; ALL-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_0213cedf +; ALL-LABEL: shuffle_v8i16_0213cedf: ; ALL: # BB#0: -; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7] -; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,2,3] -; ALL-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,1,3,4,5,6,7] -; ALL-NEXT: punpcklqdq %xmm1, %xmm0 +; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] +; ALL-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_443aXXXX +; SSE2-LABEL: shuffle_v8i16_443aXXXX: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,3,4,5,6,7] -; SSE2-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_443aXXXX +; SSSE3-LABEL: shuffle_v8i16_443aXXXX: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3] -; SSSE3-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,3,4,5,6,7] -; SSSE3-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,12,13,10,11,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_443aXXXX: +; SSE41: # BB#0: +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] +; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_032dXXXX +; SSE2-LABEL: shuffle_v8i16_032dXXXX: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: punpcklwd %xmm1, %xmm0 -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_032dXXXX +; SSSE3-LABEL: shuffle_v8i16_032dXXXX: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1] -; SSSE3-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,12,13,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_032dXXXX: +; SSE41: # BB#0: +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) { -; ALL-LABEL: @shuffle_v8i16_XXXdXXXX +; ALL-LABEL: shuffle_v8i16_XXXdXXXX: ; ALL: # BB#0: -; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm1[2,2,3,3] +; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_012dXXXX +; SSE2-LABEL: shuffle_v8i16_012dXXXX: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: punpcklwd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,0,3,4,5,6,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_012dXXXX +; SSSE3-LABEL: shuffle_v8i16_012dXXXX: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1] -; SSSE3-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_012dXXXX: +; SSE41: # BB#0: +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_XXXXcde3 +; SSE2-LABEL: shuffle_v8i16_XXXXcde3: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] -; SSE2-NEXT: punpckhwd %xmm0, %xmm1 -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,2] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_XXXXcde3 +; SSSE3-LABEL: shuffle_v8i16_XXXXcde3: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] -; SSSE3-NEXT: punpckhwd %xmm0, %xmm1 # xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}},0,1,4,5,8,9,14,15] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_XXXXcde3: +; SSE41: # BB#0: +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_cde3XXXX +; SSE2-LABEL: shuffle_v8i16_cde3XXXX: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] -; SSE2-NEXT: punpckhwd %xmm0, %xmm1 -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_cde3XXXX +; SSSE3-LABEL: shuffle_v8i16_cde3XXXX: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] -; SSSE3-NEXT: punpckhwd %xmm0, %xmm1 # xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[0,1,4,5,8,9,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_cde3XXXX: +; SSE41: # BB#0: +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_012dcde3 +; SSE2-LABEL: shuffle_v8i16_012dcde3: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm0[0,1,0,1] -; SSE2-NEXT: pshufd {{.*}} # xmm3 = xmm1[2,3,0,1] -; SSE2-NEXT: punpckhwd %xmm2, %xmm1 -; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,7,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3] -; SSE2-NEXT: punpcklwd %xmm3, %xmm0 -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7] -; SSE2-NEXT: punpcklqdq %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,0,3,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_012dcde3 +; SSSE3-LABEL: shuffle_v8i16_012dcde3: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufd {{.*}} # xmm2 = xmm0[0,1,0,1] -; SSSE3-NEXT: pshufd {{.*}} # xmm3 = xmm1[2,3,0,1] -; SSSE3-NEXT: punpckhwd %xmm2, %xmm1 # xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[0,1,4,5,8,9,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] -; SSSE3-NEXT: punpcklwd %xmm3, %xmm0 # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] -; SSSE3-NEXT: punpcklqdq %xmm1, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_012dcde3: +; SSE41: # BB#0: +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15] +; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3] +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_XXX1X579 +; SSE2-LABEL: shuffle_v8i16_XXX1X579: ; SSE2: # BB#0: -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,3,2,4,5,6,7] -; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,2,2,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,5,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_XXX1X579 +; SSSE3-LABEL: shuffle_v8i16_XXX1X579: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+}},2,3,10,11,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}] -; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}},4,5,{{[0-9]+,[0-9]+}},8,9,12,13,6,7] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_XXX1X579: +; SSE41: # BB#0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15] +; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { -; SSE2-LABEL: @shuffle_v8i16_XX4X8acX +; SSE2-LABEL: shuffle_v8i16_XX4X8acX: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3] -; SSE2-NEXT: punpcklwd {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,1,2,0,4,5,6,7] -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] -; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,1,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,4,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,1,2,0,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,4,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_XX4X8acX +; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufd {{.*}} # [[X:xmm[0-9]+]] = xmm0[2,3,0,1] -; SSSE3-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7] -; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3] -; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3] -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+}},2,3,{{[0-9]+,[0-9]+}},0,1,4,5,8,9,{{[0-9]+,[0-9]+}}] +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_XX4X8acX: +; SSE41: # BB#0: +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) { -; ALL-LABEL: @shuffle_v8i16_8zzzzzzz +; ALL-LABEL: shuffle_v8i16_8zzzzzzz: ; ALL: # BB#0: -; ALL-NEXT: movzwl {{.*}}, %[[R:.*]] -; ALL-NEXT: movd %[[R]], %xmm0 +; ALL-NEXT: movzwl %di, %eax +; ALL-NEXT: movd %eax, %xmm0 ; ALL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> @@ -831,10 +1033,10 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) { } define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) { -; ALL-LABEL: @shuffle_v8i16_z8zzzzzz +; ALL-LABEL: shuffle_v8i16_z8zzzzzz: ; ALL: # BB#0: -; ALL-NEXT: movzwl {{.*}}, %[[R:.*]] -; ALL-NEXT: movd %[[R]], %xmm0 +; ALL-NEXT: movzwl %di, %eax +; ALL-NEXT: movd %eax, %xmm0 ; ALL-NEXT: pslldq $2, %xmm0 ; ALL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 @@ -843,10 +1045,10 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) { } define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) { -; ALL-LABEL: @shuffle_v8i16_zzzzz8zz +; ALL-LABEL: shuffle_v8i16_zzzzz8zz: ; ALL: # BB#0: -; ALL-NEXT: movzwl {{.*}}, %[[R:.*]] -; ALL-NEXT: movd %[[R]], %xmm0 +; ALL-NEXT: movzwl %di, %eax +; ALL-NEXT: movd %eax, %xmm0 ; ALL-NEXT: pslldq $10, %xmm0 ; ALL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 @@ -855,10 +1057,10 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) { } define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) { -; ALL-LABEL: @shuffle_v8i16_zuuzuuz8 +; ALL-LABEL: shuffle_v8i16_zuuzuuz8: ; ALL: # BB#0: -; ALL-NEXT: movzwl {{.*}}, %[[R:.*]] -; ALL-NEXT: movd %[[R]], %xmm0 +; ALL-NEXT: movzwl %di, %eax +; ALL-NEXT: movd %eax, %xmm0 ; ALL-NEXT: pslldq $14, %xmm0 ; ALL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 @@ -867,10 +1069,10 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) { } define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { -; ALL-LABEL: @shuffle_v8i16_zzBzzzzz +; ALL-LABEL: shuffle_v8i16_zzBzzzzz: ; ALL: # BB#0: -; ALL-NEXT: movzwl {{.*}}, %[[R:.*]] -; ALL-NEXT: movd %[[R]], %xmm0 +; ALL-NEXT: movzwl %di, %eax +; ALL-NEXT: movd %eax, %xmm0 ; ALL-NEXT: pslldq $4, %xmm0 ; ALL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 3 @@ -879,149 +1081,368 @@ define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { } define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_def01234 +; SSE2-LABEL: shuffle_v8i16_def01234: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,3,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_def01234: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_def01234: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_ueuu123u +; SSE2-LABEL: shuffle_v8i16_ueuu123u: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,5,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_ueuu123u: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_ueuu123u: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_56701234 +; SSE2-LABEL: shuffle_v8i16_56701234: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_56701234: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_56701234: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_u6uu123u +; SSE2-LABEL: shuffle_v8i16_u6uu123u: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_u6uu123u: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_u6uu123u: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_uuuu123u +; SSE2-LABEL: shuffle_v8i16_uuuu123u: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_uuuu123u: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_uuuu123u: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_bcdef012 +; SSE2-LABEL: shuffle_v8i16_bcdef012: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[3,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_bcdef012: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_bcdef012: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_ucdeuu1u +; SSE2-LABEL: shuffle_v8i16_ucdeuu1u: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_ucdeuu1u: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_34567012 +; SSE2-LABEL: shuffle_v8i16_34567012: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_34567012: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_34567012: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_u456uu1u +; SSE2-LABEL: shuffle_v8i16_u456uu1u: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_u456uu1u: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_u456uu1u: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_u456uuuu +; SSE2-LABEL: shuffle_v8i16_u456uuuu: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_u456uuuu: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_u456uuuu: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_3456789a +; SSE2-LABEL: shuffle_v8i16_3456789a: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_3456789a: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $6, {{.*}} # xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_3456789a: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_u456uu9u +; SSE2-LABEL: shuffle_v8i16_u456uu9u: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_u456uu9u: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $6, {{.*}} # xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_u456uu9u: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_56789abc +; SSE2-LABEL: shuffle_v8i16_56789abc: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,3,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_56789abc: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $10, {{.*}} # xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_56789abc: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) { -; SSSE3-LABEL: @shuffle_v8i16_u6uu9abu +; SSE2-LABEL: shuffle_v8i16_u6uu9abu: +; SSE2: # BB#0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,5,7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_u6uu9abu: ; SSSE3: # BB#0: -; SSSE3-NEXT: palignr $10, {{.*}} # xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_u6uu9abu: +; SSE41: # BB#0: +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) { -; SSE2-LABEL: @shuffle_v8i16_0uuu1uuu +; SSE2-LABEL: shuffle_v8i16_0uuu1uuu: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] -; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_0uuu1uuu +; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu: ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3] -; SSSE3-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v8i16_0uuu1uuu +; SSE41-LABEL: shuffle_v8i16_0uuu1uuu: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxwq %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -1030,21 +1451,21 @@ define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) { } define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) { -; SSE2-LABEL: @shuffle_v8i16_0zzz1zzz +; SSE2-LABEL: shuffle_v8i16_0zzz1zzz: ; SSE2: # BB#0: -; SSE2-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]] -; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3] -; SSE2-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_0zzz1zzz +; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz: ; SSSE3: # BB#0: -; SSSE3-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]] -; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3] -; SSSE3-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v8i16_0zzz1zzz +; SSE41-LABEL: shuffle_v8i16_0zzz1zzz: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxwq %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -1053,17 +1474,17 @@ define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) { } define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) { -; SSE2-LABEL: @shuffle_v8i16_0u1u2u3u +; SSE2-LABEL: shuffle_v8i16_0u1u2u3u: ; SSE2: # BB#0: -; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_0u1u2u3u +; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u: ; SSSE3: # BB#0: -; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v8i16_0u1u2u3u +; SSE41-LABEL: shuffle_v8i16_0u1u2u3u: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxwd %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -1072,19 +1493,19 @@ define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) { } define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { -; SSE2-LABEL: @shuffle_v8i16_0z1z2z3z +; SSE2-LABEL: shuffle_v8i16_0z1z2z3z: ; SSE2: # BB#0: -; SSE2-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: @shuffle_v8i16_0z1z2z3z +; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z: ; SSSE3: # BB#0: -; SSSE3-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: @shuffle_v8i16_0z1z2z3z +; SSE41-LABEL: shuffle_v8i16_0z1z2z3z: ; SSE41: # BB#0: ; SSE41-NEXT: pmovzxwd %xmm0, %xmm0 ; SSE41-NEXT: retq