diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 86cf68ae7c9..c9d73d74e84 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2991,6 +2991,46 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_i64_ty], [IntrNoMem]>; } +// Pack ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packsswb_256 : GCCBuiltin<"__builtin_ia32_packsswb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,llvm_v16i16_ty, + llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packsswb_512 : GCCBuiltin<"__builtin_ia32_packsswb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty, + llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packssdw_256 : GCCBuiltin<"__builtin_ia32_packssdw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packssdw_512 : GCCBuiltin<"__builtin_ia32_packssdw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packuswb_256 : GCCBuiltin<"__builtin_ia32_packuswb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,llvm_v16i16_ty, + llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packuswb_512 : GCCBuiltin<"__builtin_ia32_packuswb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty, + llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packusdw_128 : GCCBuiltin<"__builtin_ia32_packusdw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packusdw_256 : GCCBuiltin<"__builtin_ia32_packusdw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_packusdw_512 : GCCBuiltin<"__builtin_ia32_packusdw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; +} + // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_cvttps2dq_512: GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 85bdfa7d7af..8a9bd78e6a6 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3118,6 +3118,77 @@ defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, X86pmuludq, 1>; +multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { + let mayLoad = 1 in { + defm rmb : AVX512_maskable, + EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>; + } +} + +multiclass avx512_packs_rm opc, string OpcodeStr, + SDNode OpNode,X86VectorVTInfo _Src, + X86VectorVTInfo _Dst> { + defm rr : AVX512_maskable, EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V; + let mayLoad = 1 in { + defm rm : AVX512_maskable, EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>; + } +} + +multiclass avx512_packs_all_i32_i16 opc, string OpcodeStr, + SDNode OpNode> { + defm NAME#Z : avx512_packs_rm, + avx512_packs_rmb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, + avx512_packs_rmb, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, + avx512_packs_rmb, EVEX_V128; + } +} +multiclass avx512_packs_all_i16_i8 opc, string OpcodeStr, + SDNode OpNode> { + defm NAME#Z : avx512_packs_rm, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, EVEX_V128; + } +} +let Predicates = [HasBWI] in { + defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD; + defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD; + defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W; + defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W; +} + defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 6944e560210..e633303334e 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -353,6 +353,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), + X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), + X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), + X86_INTRINSIC_DATA(avx512_mask_packsswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), + X86_INTRINSIC_DATA(avx512_mask_packsswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), + X86_INTRINSIC_DATA(avx512_mask_packsswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), + X86_INTRINSIC_DATA(avx512_mask_packusdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0), + X86_INTRINSIC_DATA(avx512_mask_packusdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0), + X86_INTRINSIC_DATA(avx512_mask_packusdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0), + X86_INTRINSIC_DATA(avx512_mask_packuswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0), + X86_INTRINSIC_DATA(avx512_mask_packuswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0), + X86_INTRINSIC_DATA(avx512_mask_packuswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx512_mask_padd_b_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0), X86_INTRINSIC_DATA(avx512_mask_padd_b_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0), X86_INTRINSIC_DATA(avx512_mask_padd_b_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 308de16aefd..0006efd091a 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -351,3 +351,252 @@ define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> % ret <8 x i16> %res } declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly + +define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { + ;CHECK-LABEL: test_mask_packs_epi32_rr_512 + ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1] + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rrk_512 + ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512 + ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi32_rm_512 + ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07] + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmk_512 + ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f] + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512 + ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07] + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi32_rmb_512 + ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512 + ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512 + ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) + +define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_packs_epi16_rr_512 + ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1] + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rrk_512 + ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1] + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512 + ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1] + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi16_rm_512 + ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rmk_512 + ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512 + ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) + + +define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { + ;CHECK-LABEL: test_mask_packus_epi32_rr_512 + ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rrk_512 + ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512 + ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi32_rm_512 + ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmk_512 + ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512 + ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} + %b = load <16 x i32>, <16 x i32>* %ptr_b + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi32_rmb_512 + ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512 + ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) + ret <32 x i16> %res +} + +define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512 + ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) + ret <32 x i16> %res +} + +declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) + +define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { + ;CHECK-LABEL: test_mask_packus_epi16_rr_512 + ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rrk_512 + ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512 + ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi16_rm_512 + ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rmk_512 + ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) + ret <64 x i8> %res +} + +define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512 + ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} + %b = load <32 x i16>, <32 x i16>* %ptr_b + %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) + ret <64 x i8> %res +} + +declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) \ No newline at end of file diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 03a0466ab19..0834bc6afc6 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1419,3 +1419,501 @@ define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %pt declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { + ;CHECK-LABEL: test_mask_packs_epi32_rr_128 + ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rrk_128 + ;CHECK: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rrkz_128 + ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi32_rm_128 + ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmk_128 + ;CHECK: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmkz_128 + ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi32_rmb_128 + ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmbk_128 + ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_128 + ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) + +define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { + ;CHECK-LABEL: test_mask_packs_epi32_rr_256 + ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rrk_256 + ;CHECK: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rrkz_256 + ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi32_rm_256 + ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmk_256 + ;CHECK: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmkz_256 + ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi32_rmb_256 + ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmbk_256 + ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_256 + ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) + +define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_packs_epi16_rr_128 + ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1] + %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rrk_128 + ;CHECK: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0xd1] + %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rrkz_128 + ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0xc1] + %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi16_rm_128 + ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rmk_128 + ;CHECK: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0x0f] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rmkz_128 + ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0x07] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) + +define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_packs_epi16_rr_256 + ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1] + %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rrk_256 + ;CHECK: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0xd1] + %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rrkz_256 + ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0xc1] + %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_packs_epi16_rm_256 + ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rmk_256 + ;CHECK: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0x0f] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_packs_epi16_rmkz_256 + ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0x07] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) + + +define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { + ;CHECK-LABEL: test_mask_packus_epi32_rr_128 + ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0 + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rrk_128 + ;CHECK: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rrkz_128 + ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi32_rm_128 + ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0 + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmk_128 + ;CHECK: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmkz_128 + ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <4 x i32>, <4 x i32>* %ptr_b + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi32_rmb_128 + ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmbk_128 + ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) + ret <8 x i16> %res +} + +define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_128 + ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 + %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer + %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) + +define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { + ;CHECK-LABEL: test_mask_packus_epi32_rr_256 + ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0 + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rrk_256 + ;CHECK: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rrkz_256 + ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi32_rm_256 + ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0 + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmk_256 + ;CHECK: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmkz_256 + ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <8 x i32>, <8 x i32>* %ptr_b + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi32_rmb_256 + ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmbk_256 + ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) + ret <16 x i16> %res +} + +define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_256 + ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} + %q = load i32, i32* %ptr_b + %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 + %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer + %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) + +define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { + ;CHECK-LABEL: test_mask_packus_epi16_rr_128 + ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0 + %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rrk_128 + ;CHECK: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} + %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rrkz_128 + ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi16_rm_128 + ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0 + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rmk_128 + ;CHECK: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) + ret <16 x i8> %res +} + +define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rmkz_128 + ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} + %b = load <8 x i16>, <8 x i16>* %ptr_b + %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) + +define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { + ;CHECK-LABEL: test_mask_packus_epi16_rr_256 + ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0 + %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rrk_256 + ;CHECK: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} + %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rrkz_256 + ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} + %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { + ;CHECK-LABEL: test_mask_packus_epi16_rm_256 + ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0 + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rmk_256 + ;CHECK: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) + ret <32 x i8> %res +} + +define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { + ;CHECK-LABEL: test_mask_packus_epi16_rmkz_256 + ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} + %b = load <16 x i16>, <16 x i16>* %ptr_b + %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) + ret <32 x i8> %res +} + +declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) + diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s index 0d055b1cecd..3f4855a66a7 100644 --- a/test/MC/X86/x86-64-avx512bw.s +++ b/test/MC/X86/x86-64-avx512bw.s @@ -2008,3 +2008,330 @@ // CHECK: encoding: [0x62,0xe2,0x95,0x40,0x11,0x9a,0xc0,0xdf,0xff,0xff] vpsravw -8256(%rdx), %zmm29, %zmm19 +// CHECK: vpacksswb %zmm18, %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xa1,0x95,0x40,0x63,0xea] + vpacksswb %zmm18, %zmm29, %zmm21 + +// CHECK: vpacksswb %zmm18, %zmm29, %zmm21 {%k1} +// CHECK: encoding: [0x62,0xa1,0x95,0x41,0x63,0xea] + vpacksswb %zmm18, %zmm29, %zmm21 {%k1} + +// CHECK: vpacksswb %zmm18, %zmm29, %zmm21 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x95,0xc1,0x63,0xea] + vpacksswb %zmm18, %zmm29, %zmm21 {%k1} {z} + +// CHECK: vpacksswb (%rcx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0x29] + vpacksswb (%rcx), %zmm29, %zmm21 + +// CHECK: vpacksswb 291(%rax,%r14,8), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xa1,0x95,0x40,0x63,0xac,0xf0,0x23,0x01,0x00,0x00] + vpacksswb 291(%rax,%r14,8), %zmm29, %zmm21 + +// CHECK: vpacksswb 8128(%rdx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0x6a,0x7f] + vpacksswb 8128(%rdx), %zmm29, %zmm21 + +// CHECK: vpacksswb 8192(%rdx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0xaa,0x00,0x20,0x00,0x00] + vpacksswb 8192(%rdx), %zmm29, %zmm21 + +// CHECK: vpacksswb -8192(%rdx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0x6a,0x80] + vpacksswb -8192(%rdx), %zmm29, %zmm21 + +// CHECK: vpacksswb -8256(%rdx), %zmm29, %zmm21 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0xaa,0xc0,0xdf,0xff,0xff] + vpacksswb -8256(%rdx), %zmm29, %zmm21 + +// CHECK: vpackuswb %zmm28, %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x01,0xbd,0x40,0x67,0xe4] + vpackuswb %zmm28, %zmm24, %zmm28 + +// CHECK: vpackuswb %zmm28, %zmm24, %zmm28 {%k6} +// CHECK: encoding: [0x62,0x01,0xbd,0x46,0x67,0xe4] + vpackuswb %zmm28, %zmm24, %zmm28 {%k6} + +// CHECK: vpackuswb %zmm28, %zmm24, %zmm28 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0xbd,0xc6,0x67,0xe4] + vpackuswb %zmm28, %zmm24, %zmm28 {%k6} {z} + +// CHECK: vpackuswb (%rcx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0x21] + vpackuswb (%rcx), %zmm24, %zmm28 + +// CHECK: vpackuswb 291(%rax,%r14,8), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x21,0xbd,0x40,0x67,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpackuswb 291(%rax,%r14,8), %zmm24, %zmm28 + +// CHECK: vpackuswb 8128(%rdx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0x62,0x7f] + vpackuswb 8128(%rdx), %zmm24, %zmm28 + +// CHECK: vpackuswb 8192(%rdx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0xa2,0x00,0x20,0x00,0x00] + vpackuswb 8192(%rdx), %zmm24, %zmm28 + +// CHECK: vpackuswb -8192(%rdx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0x62,0x80] + vpackuswb -8192(%rdx), %zmm24, %zmm28 + +// CHECK: vpackuswb -8256(%rdx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0xa2,0xc0,0xdf,0xff,0xff] + vpackuswb -8256(%rdx), %zmm24, %zmm28 + +// CHECK: vpacksswb %zmm24, %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x01,0x95,0x40,0x63,0xd0] + vpacksswb %zmm24, %zmm29, %zmm26 + +// CHECK: vpacksswb %zmm24, %zmm29, %zmm26 {%k6} +// CHECK: encoding: [0x62,0x01,0x95,0x46,0x63,0xd0] + vpacksswb %zmm24, %zmm29, %zmm26 {%k6} + +// CHECK: vpacksswb %zmm24, %zmm29, %zmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0x95,0xc6,0x63,0xd0] + vpacksswb %zmm24, %zmm29, %zmm26 {%k6} {z} + +// CHECK: vpacksswb (%rcx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x11] + vpacksswb (%rcx), %zmm29, %zmm26 + +// CHECK: vpacksswb 4660(%rax,%r14,8), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x21,0x95,0x40,0x63,0x94,0xf0,0x34,0x12,0x00,0x00] + vpacksswb 4660(%rax,%r14,8), %zmm29, %zmm26 + +// CHECK: vpacksswb 8128(%rdx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x52,0x7f] + vpacksswb 8128(%rdx), %zmm29, %zmm26 + +// CHECK: vpacksswb 8192(%rdx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x92,0x00,0x20,0x00,0x00] + vpacksswb 8192(%rdx), %zmm29, %zmm26 + +// CHECK: vpacksswb -8192(%rdx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x52,0x80] + vpacksswb -8192(%rdx), %zmm29, %zmm26 + +// CHECK: vpacksswb -8256(%rdx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x92,0xc0,0xdf,0xff,0xff] + vpacksswb -8256(%rdx), %zmm29, %zmm26 + +// CHECK: vpackuswb %zmm23, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x67,0xd7] + vpackuswb %zmm23, %zmm28, %zmm18 + +// CHECK: vpackuswb %zmm23, %zmm28, %zmm18 {%k4} +// CHECK: encoding: [0x62,0xa1,0x9d,0x44,0x67,0xd7] + vpackuswb %zmm23, %zmm28, %zmm18 {%k4} + +// CHECK: vpackuswb %zmm23, %zmm28, %zmm18 {%k4} {z} +// CHECK: encoding: [0x62,0xa1,0x9d,0xc4,0x67,0xd7] + vpackuswb %zmm23, %zmm28, %zmm18 {%k4} {z} + +// CHECK: vpackuswb (%rcx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x11] + vpackuswb (%rcx), %zmm28, %zmm18 + +// CHECK: vpackuswb 4660(%rax,%r14,8), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x67,0x94,0xf0,0x34,0x12,0x00,0x00] + vpackuswb 4660(%rax,%r14,8), %zmm28, %zmm18 + +// CHECK: vpackuswb 8128(%rdx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x52,0x7f] + vpackuswb 8128(%rdx), %zmm28, %zmm18 + +// CHECK: vpackuswb 8192(%rdx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x92,0x00,0x20,0x00,0x00] + vpackuswb 8192(%rdx), %zmm28, %zmm18 + +// CHECK: vpackuswb -8192(%rdx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x52,0x80] + vpackuswb -8192(%rdx), %zmm28, %zmm18 + +// CHECK: vpackuswb -8256(%rdx), %zmm28, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x92,0xc0,0xdf,0xff,0xff] + vpackuswb -8256(%rdx), %zmm28, %zmm18 + +// CHECK: vpackssdw %zmm22, %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x21,0x25,0x40,0x6b,0xc6] + vpackssdw %zmm22, %zmm27, %zmm24 + +// CHECK: vpackssdw %zmm22, %zmm27, %zmm24 {%k2} +// CHECK: encoding: [0x62,0x21,0x25,0x42,0x6b,0xc6] + vpackssdw %zmm22, %zmm27, %zmm24 {%k2} + +// CHECK: vpackssdw %zmm22, %zmm27, %zmm24 {%k2} {z} +// CHECK: encoding: [0x62,0x21,0x25,0xc2,0x6b,0xc6] + vpackssdw %zmm22, %zmm27, %zmm24 {%k2} {z} + +// CHECK: vpackssdw (%rcx), %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x01] + vpackssdw (%rcx), %zmm27, %zmm24 + +// CHECK: vpackssdw 291(%rax,%r14,8), %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x21,0x25,0x40,0x6b,0x84,0xf0,0x23,0x01,0x00,0x00] + vpackssdw 291(%rax,%r14,8), %zmm27, %zmm24 + +// CHECK: vpackssdw (%rcx){1to16}, %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x01] + vpackssdw (%rcx){1to16}, %zmm27, %zmm24 + +// CHECK: vpackssdw 8128(%rdx), %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x42,0x7f] + vpackssdw 8128(%rdx), %zmm27, %zmm24 + +// CHECK: vpackssdw 8192(%rdx), %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x82,0x00,0x20,0x00,0x00] + vpackssdw 8192(%rdx), %zmm27, %zmm24 + +// CHECK: vpackssdw -8192(%rdx), %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x42,0x80] + vpackssdw -8192(%rdx), %zmm27, %zmm24 + +// CHECK: vpackssdw -8256(%rdx), %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x82,0xc0,0xdf,0xff,0xff] + vpackssdw -8256(%rdx), %zmm27, %zmm24 + +// CHECK: vpackssdw 508(%rdx){1to16}, %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x42,0x7f] + vpackssdw 508(%rdx){1to16}, %zmm27, %zmm24 + +// CHECK: vpackssdw 512(%rdx){1to16}, %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x82,0x00,0x02,0x00,0x00] + vpackssdw 512(%rdx){1to16}, %zmm27, %zmm24 + +// CHECK: vpackssdw -512(%rdx){1to16}, %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x42,0x80] + vpackssdw -512(%rdx){1to16}, %zmm27, %zmm24 + +// CHECK: vpackssdw -516(%rdx){1to16}, %zmm27, %zmm24 +// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x82,0xfc,0xfd,0xff,0xff] + vpackssdw -516(%rdx){1to16}, %zmm27, %zmm24 + +// CHECK: vpacksswb %zmm23, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x21,0xed,0x40,0x63,0xe7] + vpacksswb %zmm23, %zmm18, %zmm28 + +// CHECK: vpacksswb %zmm23, %zmm18, %zmm28 {%k3} +// CHECK: encoding: [0x62,0x21,0xed,0x43,0x63,0xe7] + vpacksswb %zmm23, %zmm18, %zmm28 {%k3} + +// CHECK: vpacksswb %zmm23, %zmm18, %zmm28 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0xed,0xc3,0x63,0xe7] + vpacksswb %zmm23, %zmm18, %zmm28 {%k3} {z} + +// CHECK: vpacksswb (%rcx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0x21] + vpacksswb (%rcx), %zmm18, %zmm28 + +// CHECK: vpacksswb 291(%rax,%r14,8), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x21,0xed,0x40,0x63,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpacksswb 291(%rax,%r14,8), %zmm18, %zmm28 + +// CHECK: vpacksswb 8128(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0x62,0x7f] + vpacksswb 8128(%rdx), %zmm18, %zmm28 + +// CHECK: vpacksswb 8192(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0xa2,0x00,0x20,0x00,0x00] + vpacksswb 8192(%rdx), %zmm18, %zmm28 + +// CHECK: vpacksswb -8192(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0x62,0x80] + vpacksswb -8192(%rdx), %zmm18, %zmm28 + +// CHECK: vpacksswb -8256(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0xa2,0xc0,0xdf,0xff,0xff] + vpacksswb -8256(%rdx), %zmm18, %zmm28 + +// CHECK: vpackusdw %zmm21, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xa2,0x15,0x40,0x2b,0xd5] + vpackusdw %zmm21, %zmm29, %zmm18 + +// CHECK: vpackusdw %zmm21, %zmm29, %zmm18 {%k2} +// CHECK: encoding: [0x62,0xa2,0x15,0x42,0x2b,0xd5] + vpackusdw %zmm21, %zmm29, %zmm18 {%k2} + +// CHECK: vpackusdw %zmm21, %zmm29, %zmm18 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x15,0xc2,0x2b,0xd5] + vpackusdw %zmm21, %zmm29, %zmm18 {%k2} {z} + +// CHECK: vpackusdw (%rcx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x11] + vpackusdw (%rcx), %zmm29, %zmm18 + +// CHECK: vpackusdw 291(%rax,%r14,8), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xa2,0x15,0x40,0x2b,0x94,0xf0,0x23,0x01,0x00,0x00] + vpackusdw 291(%rax,%r14,8), %zmm29, %zmm18 + +// CHECK: vpackusdw (%rcx){1to16}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x11] + vpackusdw (%rcx){1to16}, %zmm29, %zmm18 + +// CHECK: vpackusdw 8128(%rdx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x52,0x7f] + vpackusdw 8128(%rdx), %zmm29, %zmm18 + +// CHECK: vpackusdw 8192(%rdx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x92,0x00,0x20,0x00,0x00] + vpackusdw 8192(%rdx), %zmm29, %zmm18 + +// CHECK: vpackusdw -8192(%rdx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x52,0x80] + vpackusdw -8192(%rdx), %zmm29, %zmm18 + +// CHECK: vpackusdw -8256(%rdx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x92,0xc0,0xdf,0xff,0xff] + vpackusdw -8256(%rdx), %zmm29, %zmm18 + +// CHECK: vpackusdw 508(%rdx){1to16}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x52,0x7f] + vpackusdw 508(%rdx){1to16}, %zmm29, %zmm18 + +// CHECK: vpackusdw 512(%rdx){1to16}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x92,0x00,0x02,0x00,0x00] + vpackusdw 512(%rdx){1to16}, %zmm29, %zmm18 + +// CHECK: vpackusdw -512(%rdx){1to16}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x52,0x80] + vpackusdw -512(%rdx){1to16}, %zmm29, %zmm18 + +// CHECK: vpackusdw -516(%rdx){1to16}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x92,0xfc,0xfd,0xff,0xff] + vpackusdw -516(%rdx){1to16}, %zmm29, %zmm18 + +// CHECK: vpackuswb %zmm27, %zmm18, %zmm23 +// CHECK: encoding: [0x62,0x81,0xed,0x40,0x67,0xfb] + vpackuswb %zmm27, %zmm18, %zmm23 + +// CHECK: vpackuswb %zmm27, %zmm18, %zmm23 {%k5} +// CHECK: encoding: [0x62,0x81,0xed,0x45,0x67,0xfb] + vpackuswb %zmm27, %zmm18, %zmm23 {%k5} + +// CHECK: vpackuswb %zmm27, %zmm18, %zmm23 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0xed,0xc5,0x67,0xfb] + vpackuswb %zmm27, %zmm18, %zmm23 {%k5} {z} + +// CHECK: vpackuswb (%rcx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0x39] + vpackuswb (%rcx), %zmm18, %zmm23 + +// CHECK: vpackuswb 291(%rax,%r14,8), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xa1,0xed,0x40,0x67,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpackuswb 291(%rax,%r14,8), %zmm18, %zmm23 + +// CHECK: vpackuswb 8128(%rdx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0x7a,0x7f] + vpackuswb 8128(%rdx), %zmm18, %zmm23 + +// CHECK: vpackuswb 8192(%rdx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0xba,0x00,0x20,0x00,0x00] + vpackuswb 8192(%rdx), %zmm18, %zmm23 + +// CHECK: vpackuswb -8192(%rdx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0x7a,0x80] + vpackuswb -8192(%rdx), %zmm18, %zmm23 + +// CHECK: vpackuswb -8256(%rdx), %zmm18, %zmm23 +// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0xba,0xc0,0xdf,0xff,0xff] + vpackuswb -8256(%rdx), %zmm18, %zmm23 diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s index 3f455827166..b3883c8d178 100644 --- a/test/MC/X86/x86-64-avx512bw_vl.s +++ b/test/MC/X86/x86-64-avx512bw_vl.s @@ -2303,3 +2303,1027 @@ // CHECK: vpsravw -4128(%rdx), %ymm28, %ymm20 // CHECK: encoding: [0x62,0xe2,0x9d,0x20,0x11,0xa2,0xe0,0xef,0xff,0xff] vpsravw -4128(%rdx), %ymm28, %ymm20 + +// CHECK: vpacksswb %xmm20, %xmm29, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x95,0x00,0x63,0xcc] + vpacksswb %xmm20, %xmm29, %xmm17 + +// CHECK: vpacksswb %xmm20, %xmm29, %xmm17 {%k3} +// CHECK: encoding: [0x62,0xa1,0x95,0x03,0x63,0xcc] + vpacksswb %xmm20, %xmm29, %xmm17 {%k3} + +// CHECK: vpacksswb %xmm20, %xmm29, %xmm17 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x95,0x83,0x63,0xcc] + vpacksswb %xmm20, %xmm29, %xmm17 {%k3} {z} + +// CHECK: vpacksswb (%rcx), %xmm29, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x63,0x09] + vpacksswb (%rcx), %xmm29, %xmm17 + +// CHECK: vpacksswb 291(%rax,%r14,8), %xmm29, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x95,0x00,0x63,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpacksswb 291(%rax,%r14,8), %xmm29, %xmm17 + +// CHECK: vpacksswb 2032(%rdx), %xmm29, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x63,0x4a,0x7f] + vpacksswb 2032(%rdx), %xmm29, %xmm17 + +// CHECK: vpacksswb 2048(%rdx), %xmm29, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x63,0x8a,0x00,0x08,0x00,0x00] + vpacksswb 2048(%rdx), %xmm29, %xmm17 + +// CHECK: vpacksswb -2048(%rdx), %xmm29, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x63,0x4a,0x80] + vpacksswb -2048(%rdx), %xmm29, %xmm17 + +// CHECK: vpacksswb -2064(%rdx), %xmm29, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x95,0x00,0x63,0x8a,0xf0,0xf7,0xff,0xff] + vpacksswb -2064(%rdx), %xmm29, %xmm17 + +// CHECK: vpacksswb %ymm21, %ymm26, %ymm29 +// CHECK: encoding: [0x62,0x21,0xad,0x20,0x63,0xed] + vpacksswb %ymm21, %ymm26, %ymm29 + +// CHECK: vpacksswb %ymm21, %ymm26, %ymm29 {%k2} +// CHECK: encoding: [0x62,0x21,0xad,0x22,0x63,0xed] + vpacksswb %ymm21, %ymm26, %ymm29 {%k2} + +// CHECK: vpacksswb %ymm21, %ymm26, %ymm29 {%k2} {z} +// CHECK: encoding: [0x62,0x21,0xad,0xa2,0x63,0xed] + vpacksswb %ymm21, %ymm26, %ymm29 {%k2} {z} + +// CHECK: vpacksswb (%rcx), %ymm26, %ymm29 +// CHECK: encoding: [0x62,0x61,0xad,0x20,0x63,0x29] + vpacksswb (%rcx), %ymm26, %ymm29 + +// CHECK: vpacksswb 291(%rax,%r14,8), %ymm26, %ymm29 +// CHECK: encoding: [0x62,0x21,0xad,0x20,0x63,0xac,0xf0,0x23,0x01,0x00,0x00] + vpacksswb 291(%rax,%r14,8), %ymm26, %ymm29 + +// CHECK: vpacksswb 4064(%rdx), %ymm26, %ymm29 +// CHECK: encoding: [0x62,0x61,0xad,0x20,0x63,0x6a,0x7f] + vpacksswb 4064(%rdx), %ymm26, %ymm29 + +// CHECK: vpacksswb 4096(%rdx), %ymm26, %ymm29 +// CHECK: encoding: [0x62,0x61,0xad,0x20,0x63,0xaa,0x00,0x10,0x00,0x00] + vpacksswb 4096(%rdx), %ymm26, %ymm29 + +// CHECK: vpacksswb -4096(%rdx), %ymm26, %ymm29 +// CHECK: encoding: [0x62,0x61,0xad,0x20,0x63,0x6a,0x80] + vpacksswb -4096(%rdx), %ymm26, %ymm29 + +// CHECK: vpacksswb -4128(%rdx), %ymm26, %ymm29 +// CHECK: encoding: [0x62,0x61,0xad,0x20,0x63,0xaa,0xe0,0xef,0xff,0xff] + vpacksswb -4128(%rdx), %ymm26, %ymm29 + +// CHECK: vpackuswb %xmm24, %xmm26, %xmm20 +// CHECK: encoding: [0x62,0x81,0xad,0x00,0x67,0xe0] + vpackuswb %xmm24, %xmm26, %xmm20 + +// CHECK: vpackuswb %xmm24, %xmm26, %xmm20 {%k4} +// CHECK: encoding: [0x62,0x81,0xad,0x04,0x67,0xe0] + vpackuswb %xmm24, %xmm26, %xmm20 {%k4} + +// CHECK: vpackuswb %xmm24, %xmm26, %xmm20 {%k4} {z} +// CHECK: encoding: [0x62,0x81,0xad,0x84,0x67,0xe0] + vpackuswb %xmm24, %xmm26, %xmm20 {%k4} {z} + +// CHECK: vpackuswb (%rcx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x00,0x67,0x21] + vpackuswb (%rcx), %xmm26, %xmm20 + +// CHECK: vpackuswb 291(%rax,%r14,8), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xa1,0xad,0x00,0x67,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpackuswb 291(%rax,%r14,8), %xmm26, %xmm20 + +// CHECK: vpackuswb 2032(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x00,0x67,0x62,0x7f] + vpackuswb 2032(%rdx), %xmm26, %xmm20 + +// CHECK: vpackuswb 2048(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x00,0x67,0xa2,0x00,0x08,0x00,0x00] + vpackuswb 2048(%rdx), %xmm26, %xmm20 + +// CHECK: vpackuswb -2048(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x00,0x67,0x62,0x80] + vpackuswb -2048(%rdx), %xmm26, %xmm20 + +// CHECK: vpackuswb -2064(%rdx), %xmm26, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x00,0x67,0xa2,0xf0,0xf7,0xff,0xff] + vpackuswb -2064(%rdx), %xmm26, %xmm20 + +// CHECK: vpackuswb %ymm20, %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xa1,0x9d,0x20,0x67,0xdc] + vpackuswb %ymm20, %ymm28, %ymm19 + +// CHECK: vpackuswb %ymm20, %ymm28, %ymm19 {%k2} +// CHECK: encoding: [0x62,0xa1,0x9d,0x22,0x67,0xdc] + vpackuswb %ymm20, %ymm28, %ymm19 {%k2} + +// CHECK: vpackuswb %ymm20, %ymm28, %ymm19 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0x9d,0xa2,0x67,0xdc] + vpackuswb %ymm20, %ymm28, %ymm19 {%k2} {z} + +// CHECK: vpackuswb (%rcx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x19] + vpackuswb (%rcx), %ymm28, %ymm19 + +// CHECK: vpackuswb 291(%rax,%r14,8), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xa1,0x9d,0x20,0x67,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpackuswb 291(%rax,%r14,8), %ymm28, %ymm19 + +// CHECK: vpackuswb 4064(%rdx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x5a,0x7f] + vpackuswb 4064(%rdx), %ymm28, %ymm19 + +// CHECK: vpackuswb 4096(%rdx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x9a,0x00,0x10,0x00,0x00] + vpackuswb 4096(%rdx), %ymm28, %ymm19 + +// CHECK: vpackuswb -4096(%rdx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x5a,0x80] + vpackuswb -4096(%rdx), %ymm28, %ymm19 + +// CHECK: vpackuswb -4128(%rdx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x9a,0xe0,0xef,0xff,0xff] + vpackuswb -4128(%rdx), %ymm28, %ymm19 + +// CHECK: vpacksswb %xmm25, %xmm18, %xmm19 +// CHECK: encoding: [0x62,0x81,0xed,0x00,0x63,0xd9] + vpacksswb %xmm25, %xmm18, %xmm19 + +// CHECK: vpacksswb %xmm25, %xmm18, %xmm19 {%k4} +// CHECK: encoding: [0x62,0x81,0xed,0x04,0x63,0xd9] + vpacksswb %xmm25, %xmm18, %xmm19 {%k4} + +// CHECK: vpacksswb %xmm25, %xmm18, %xmm19 {%k4} {z} +// CHECK: encoding: [0x62,0x81,0xed,0x84,0x63,0xd9] + vpacksswb %xmm25, %xmm18, %xmm19 {%k4} {z} + +// CHECK: vpacksswb (%rcx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x63,0x19] + vpacksswb (%rcx), %xmm18, %xmm19 + +// CHECK: vpacksswb 4660(%rax,%r14,8), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xa1,0xed,0x00,0x63,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpacksswb 4660(%rax,%r14,8), %xmm18, %xmm19 + +// CHECK: vpacksswb 2032(%rdx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x63,0x5a,0x7f] + vpacksswb 2032(%rdx), %xmm18, %xmm19 + +// CHECK: vpacksswb 2048(%rdx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x63,0x9a,0x00,0x08,0x00,0x00] + vpacksswb 2048(%rdx), %xmm18, %xmm19 + +// CHECK: vpacksswb -2048(%rdx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x63,0x5a,0x80] + vpacksswb -2048(%rdx), %xmm18, %xmm19 + +// CHECK: vpacksswb -2064(%rdx), %xmm18, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xed,0x00,0x63,0x9a,0xf0,0xf7,0xff,0xff] + vpacksswb -2064(%rdx), %xmm18, %xmm19 + +// CHECK: vpacksswb %ymm19, %ymm22, %ymm22 +// CHECK: encoding: [0x62,0xa1,0xcd,0x20,0x63,0xf3] + vpacksswb %ymm19, %ymm22, %ymm22 + +// CHECK: vpacksswb %ymm19, %ymm22, %ymm22 {%k7} +// CHECK: encoding: [0x62,0xa1,0xcd,0x27,0x63,0xf3] + vpacksswb %ymm19, %ymm22, %ymm22 {%k7} + +// CHECK: vpacksswb %ymm19, %ymm22, %ymm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0xcd,0xa7,0x63,0xf3] + vpacksswb %ymm19, %ymm22, %ymm22 {%k7} {z} + +// CHECK: vpacksswb (%rcx), %ymm22, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xcd,0x20,0x63,0x31] + vpacksswb (%rcx), %ymm22, %ymm22 + +// CHECK: vpacksswb 4660(%rax,%r14,8), %ymm22, %ymm22 +// CHECK: encoding: [0x62,0xa1,0xcd,0x20,0x63,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpacksswb 4660(%rax,%r14,8), %ymm22, %ymm22 + +// CHECK: vpacksswb 4064(%rdx), %ymm22, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xcd,0x20,0x63,0x72,0x7f] + vpacksswb 4064(%rdx), %ymm22, %ymm22 + +// CHECK: vpacksswb 4096(%rdx), %ymm22, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xcd,0x20,0x63,0xb2,0x00,0x10,0x00,0x00] + vpacksswb 4096(%rdx), %ymm22, %ymm22 + +// CHECK: vpacksswb -4096(%rdx), %ymm22, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xcd,0x20,0x63,0x72,0x80] + vpacksswb -4096(%rdx), %ymm22, %ymm22 + +// CHECK: vpacksswb -4128(%rdx), %ymm22, %ymm22 +// CHECK: encoding: [0x62,0xe1,0xcd,0x20,0x63,0xb2,0xe0,0xef,0xff,0xff] + vpacksswb -4128(%rdx), %ymm22, %ymm22 + +// CHECK: vpackuswb %xmm23, %xmm28, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x9d,0x00,0x67,0xff] + vpackuswb %xmm23, %xmm28, %xmm23 + +// CHECK: vpackuswb %xmm23, %xmm28, %xmm23 {%k3} +// CHECK: encoding: [0x62,0xa1,0x9d,0x03,0x67,0xff] + vpackuswb %xmm23, %xmm28, %xmm23 {%k3} + +// CHECK: vpackuswb %xmm23, %xmm28, %xmm23 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x9d,0x83,0x67,0xff] + vpackuswb %xmm23, %xmm28, %xmm23 {%k3} {z} + +// CHECK: vpackuswb (%rcx), %xmm28, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x67,0x39] + vpackuswb (%rcx), %xmm28, %xmm23 + +// CHECK: vpackuswb 4660(%rax,%r14,8), %xmm28, %xmm23 +// CHECK: encoding: [0x62,0xa1,0x9d,0x00,0x67,0xbc,0xf0,0x34,0x12,0x00,0x00] + vpackuswb 4660(%rax,%r14,8), %xmm28, %xmm23 + +// CHECK: vpackuswb 2032(%rdx), %xmm28, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x67,0x7a,0x7f] + vpackuswb 2032(%rdx), %xmm28, %xmm23 + +// CHECK: vpackuswb 2048(%rdx), %xmm28, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x67,0xba,0x00,0x08,0x00,0x00] + vpackuswb 2048(%rdx), %xmm28, %xmm23 + +// CHECK: vpackuswb -2048(%rdx), %xmm28, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x67,0x7a,0x80] + vpackuswb -2048(%rdx), %xmm28, %xmm23 + +// CHECK: vpackuswb -2064(%rdx), %xmm28, %xmm23 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x67,0xba,0xf0,0xf7,0xff,0xff] + vpackuswb -2064(%rdx), %xmm28, %xmm23 + +// CHECK: vpackuswb %ymm27, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0x81,0xad,0x20,0x67,0xe3] + vpackuswb %ymm27, %ymm26, %ymm20 + +// CHECK: vpackuswb %ymm27, %ymm26, %ymm20 {%k6} +// CHECK: encoding: [0x62,0x81,0xad,0x26,0x67,0xe3] + vpackuswb %ymm27, %ymm26, %ymm20 {%k6} + +// CHECK: vpackuswb %ymm27, %ymm26, %ymm20 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0xad,0xa6,0x67,0xe3] + vpackuswb %ymm27, %ymm26, %ymm20 {%k6} {z} + +// CHECK: vpackuswb (%rcx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x67,0x21] + vpackuswb (%rcx), %ymm26, %ymm20 + +// CHECK: vpackuswb 4660(%rax,%r14,8), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xa1,0xad,0x20,0x67,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpackuswb 4660(%rax,%r14,8), %ymm26, %ymm20 + +// CHECK: vpackuswb 4064(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x67,0x62,0x7f] + vpackuswb 4064(%rdx), %ymm26, %ymm20 + +// CHECK: vpackuswb 4096(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x67,0xa2,0x00,0x10,0x00,0x00] + vpackuswb 4096(%rdx), %ymm26, %ymm20 + +// CHECK: vpackuswb -4096(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x67,0x62,0x80] + vpackuswb -4096(%rdx), %ymm26, %ymm20 + +// CHECK: vpackuswb -4128(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x67,0xa2,0xe0,0xef,0xff,0xff] + vpackuswb -4128(%rdx), %ymm26, %ymm20 + +// CHECK: vpackssdw %xmm26, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x01,0x2d,0x00,0x6b,0xf2] + vpackssdw %xmm26, %xmm26, %xmm30 + +// CHECK: vpackssdw %xmm26, %xmm26, %xmm30 {%k2} +// CHECK: encoding: [0x62,0x01,0x2d,0x02,0x6b,0xf2] + vpackssdw %xmm26, %xmm26, %xmm30 {%k2} + +// CHECK: vpackssdw %xmm26, %xmm26, %xmm30 {%k2} {z} +// CHECK: encoding: [0x62,0x01,0x2d,0x82,0x6b,0xf2] + vpackssdw %xmm26, %xmm26, %xmm30 {%k2} {z} + +// CHECK: vpackssdw (%rcx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0x6b,0x31] + vpackssdw (%rcx), %xmm26, %xmm30 + +// CHECK: vpackssdw 291(%rax,%r14,8), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x21,0x2d,0x00,0x6b,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpackssdw 291(%rax,%r14,8), %xmm26, %xmm30 + +// CHECK: vpackssdw (%rcx){1to4}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x10,0x6b,0x31] + vpackssdw (%rcx){1to4}, %xmm26, %xmm30 + +// CHECK: vpackssdw 2032(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0x6b,0x72,0x7f] + vpackssdw 2032(%rdx), %xmm26, %xmm30 + +// CHECK: vpackssdw 2048(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0x6b,0xb2,0x00,0x08,0x00,0x00] + vpackssdw 2048(%rdx), %xmm26, %xmm30 + +// CHECK: vpackssdw -2048(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0x6b,0x72,0x80] + vpackssdw -2048(%rdx), %xmm26, %xmm30 + +// CHECK: vpackssdw -2064(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x00,0x6b,0xb2,0xf0,0xf7,0xff,0xff] + vpackssdw -2064(%rdx), %xmm26, %xmm30 + +// CHECK: vpackssdw 508(%rdx){1to4}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x10,0x6b,0x72,0x7f] + vpackssdw 508(%rdx){1to4}, %xmm26, %xmm30 + +// CHECK: vpackssdw 512(%rdx){1to4}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x10,0x6b,0xb2,0x00,0x02,0x00,0x00] + vpackssdw 512(%rdx){1to4}, %xmm26, %xmm30 + +// CHECK: vpackssdw -512(%rdx){1to4}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x10,0x6b,0x72,0x80] + vpackssdw -512(%rdx){1to4}, %xmm26, %xmm30 + +// CHECK: vpackssdw -516(%rdx){1to4}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x10,0x6b,0xb2,0xfc,0xfd,0xff,0xff] + vpackssdw -516(%rdx){1to4}, %xmm26, %xmm30 + +// CHECK: vpackssdw %ymm26, %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x01,0x4d,0x20,0x6b,0xe2] + vpackssdw %ymm26, %ymm22, %ymm28 + +// CHECK: vpackssdw %ymm26, %ymm22, %ymm28 {%k7} +// CHECK: encoding: [0x62,0x01,0x4d,0x27,0x6b,0xe2] + vpackssdw %ymm26, %ymm22, %ymm28 {%k7} + +// CHECK: vpackssdw %ymm26, %ymm22, %ymm28 {%k7} {z} +// CHECK: encoding: [0x62,0x01,0x4d,0xa7,0x6b,0xe2] + vpackssdw %ymm26, %ymm22, %ymm28 {%k7} {z} + +// CHECK: vpackssdw (%rcx), %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0x6b,0x21] + vpackssdw (%rcx), %ymm22, %ymm28 + +// CHECK: vpackssdw 291(%rax,%r14,8), %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x21,0x4d,0x20,0x6b,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpackssdw 291(%rax,%r14,8), %ymm22, %ymm28 + +// CHECK: vpackssdw (%rcx){1to8}, %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x30,0x6b,0x21] + vpackssdw (%rcx){1to8}, %ymm22, %ymm28 + +// CHECK: vpackssdw 4064(%rdx), %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0x6b,0x62,0x7f] + vpackssdw 4064(%rdx), %ymm22, %ymm28 + +// CHECK: vpackssdw 4096(%rdx), %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0x6b,0xa2,0x00,0x10,0x00,0x00] + vpackssdw 4096(%rdx), %ymm22, %ymm28 + +// CHECK: vpackssdw -4096(%rdx), %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0x6b,0x62,0x80] + vpackssdw -4096(%rdx), %ymm22, %ymm28 + +// CHECK: vpackssdw -4128(%rdx), %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x20,0x6b,0xa2,0xe0,0xef,0xff,0xff] + vpackssdw -4128(%rdx), %ymm22, %ymm28 + +// CHECK: vpackssdw 508(%rdx){1to8}, %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x30,0x6b,0x62,0x7f] + vpackssdw 508(%rdx){1to8}, %ymm22, %ymm28 + +// CHECK: vpackssdw 512(%rdx){1to8}, %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x30,0x6b,0xa2,0x00,0x02,0x00,0x00] + vpackssdw 512(%rdx){1to8}, %ymm22, %ymm28 + +// CHECK: vpackssdw -512(%rdx){1to8}, %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x30,0x6b,0x62,0x80] + vpackssdw -512(%rdx){1to8}, %ymm22, %ymm28 + +// CHECK: vpackssdw -516(%rdx){1to8}, %ymm22, %ymm28 +// CHECK: encoding: [0x62,0x61,0x4d,0x30,0x6b,0xa2,0xfc,0xfd,0xff,0xff] + vpackssdw -516(%rdx){1to8}, %ymm22, %ymm28 + +// CHECK: vpacksswb %xmm21, %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x21,0x9d,0x00,0x63,0xcd] + vpacksswb %xmm21, %xmm28, %xmm25 + +// CHECK: vpacksswb %xmm21, %xmm28, %xmm25 {%k5} +// CHECK: encoding: [0x62,0x21,0x9d,0x05,0x63,0xcd] + vpacksswb %xmm21, %xmm28, %xmm25 {%k5} + +// CHECK: vpacksswb %xmm21, %xmm28, %xmm25 {%k5} {z} +// CHECK: encoding: [0x62,0x21,0x9d,0x85,0x63,0xcd] + vpacksswb %xmm21, %xmm28, %xmm25 {%k5} {z} + +// CHECK: vpacksswb (%rcx), %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x61,0x9d,0x00,0x63,0x09] + vpacksswb (%rcx), %xmm28, %xmm25 + +// CHECK: vpacksswb 291(%rax,%r14,8), %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x21,0x9d,0x00,0x63,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpacksswb 291(%rax,%r14,8), %xmm28, %xmm25 + +// CHECK: vpacksswb 2032(%rdx), %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x61,0x9d,0x00,0x63,0x4a,0x7f] + vpacksswb 2032(%rdx), %xmm28, %xmm25 + +// CHECK: vpacksswb 2048(%rdx), %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x61,0x9d,0x00,0x63,0x8a,0x00,0x08,0x00,0x00] + vpacksswb 2048(%rdx), %xmm28, %xmm25 + +// CHECK: vpacksswb -2048(%rdx), %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x61,0x9d,0x00,0x63,0x4a,0x80] + vpacksswb -2048(%rdx), %xmm28, %xmm25 + +// CHECK: vpacksswb -2064(%rdx), %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x61,0x9d,0x00,0x63,0x8a,0xf0,0xf7,0xff,0xff] + vpacksswb -2064(%rdx), %xmm28, %xmm25 + +// CHECK: vpacksswb %ymm22, %ymm28, %ymm21 +// CHECK: encoding: [0x62,0xa1,0x9d,0x20,0x63,0xee] + vpacksswb %ymm22, %ymm28, %ymm21 + +// CHECK: vpacksswb %ymm22, %ymm28, %ymm21 {%k5} +// CHECK: encoding: [0x62,0xa1,0x9d,0x25,0x63,0xee] + vpacksswb %ymm22, %ymm28, %ymm21 {%k5} + +// CHECK: vpacksswb %ymm22, %ymm28, %ymm21 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x9d,0xa5,0x63,0xee] + vpacksswb %ymm22, %ymm28, %ymm21 {%k5} {z} + +// CHECK: vpacksswb (%rcx), %ymm28, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x63,0x29] + vpacksswb (%rcx), %ymm28, %ymm21 + +// CHECK: vpacksswb 291(%rax,%r14,8), %ymm28, %ymm21 +// CHECK: encoding: [0x62,0xa1,0x9d,0x20,0x63,0xac,0xf0,0x23,0x01,0x00,0x00] + vpacksswb 291(%rax,%r14,8), %ymm28, %ymm21 + +// CHECK: vpacksswb 4064(%rdx), %ymm28, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x63,0x6a,0x7f] + vpacksswb 4064(%rdx), %ymm28, %ymm21 + +// CHECK: vpacksswb 4096(%rdx), %ymm28, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x63,0xaa,0x00,0x10,0x00,0x00] + vpacksswb 4096(%rdx), %ymm28, %ymm21 + +// CHECK: vpacksswb -4096(%rdx), %ymm28, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x63,0x6a,0x80] + vpacksswb -4096(%rdx), %ymm28, %ymm21 + +// CHECK: vpacksswb -4128(%rdx), %ymm28, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x63,0xaa,0xe0,0xef,0xff,0xff] + vpacksswb -4128(%rdx), %ymm28, %ymm21 + +// CHECK: vpackusdw %xmm20, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x22,0x3d,0x00,0x2b,0xc4] + vpackusdw %xmm20, %xmm24, %xmm24 + +// CHECK: vpackusdw %xmm20, %xmm24, %xmm24 {%k5} +// CHECK: encoding: [0x62,0x22,0x3d,0x05,0x2b,0xc4] + vpackusdw %xmm20, %xmm24, %xmm24 {%k5} + +// CHECK: vpackusdw %xmm20, %xmm24, %xmm24 {%k5} {z} +// CHECK: encoding: [0x62,0x22,0x3d,0x85,0x2b,0xc4] + vpackusdw %xmm20, %xmm24, %xmm24 {%k5} {z} + +// CHECK: vpackusdw (%rcx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0x2b,0x01] + vpackusdw (%rcx), %xmm24, %xmm24 + +// CHECK: vpackusdw 291(%rax,%r14,8), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x22,0x3d,0x00,0x2b,0x84,0xf0,0x23,0x01,0x00,0x00] + vpackusdw 291(%rax,%r14,8), %xmm24, %xmm24 + +// CHECK: vpackusdw (%rcx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0x2b,0x01] + vpackusdw (%rcx){1to4}, %xmm24, %xmm24 + +// CHECK: vpackusdw 2032(%rdx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0x2b,0x42,0x7f] + vpackusdw 2032(%rdx), %xmm24, %xmm24 + +// CHECK: vpackusdw 2048(%rdx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0x2b,0x82,0x00,0x08,0x00,0x00] + vpackusdw 2048(%rdx), %xmm24, %xmm24 + +// CHECK: vpackusdw -2048(%rdx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0x2b,0x42,0x80] + vpackusdw -2048(%rdx), %xmm24, %xmm24 + +// CHECK: vpackusdw -2064(%rdx), %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x00,0x2b,0x82,0xf0,0xf7,0xff,0xff] + vpackusdw -2064(%rdx), %xmm24, %xmm24 + +// CHECK: vpackusdw 508(%rdx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0x2b,0x42,0x7f] + vpackusdw 508(%rdx){1to4}, %xmm24, %xmm24 + +// CHECK: vpackusdw 512(%rdx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0x2b,0x82,0x00,0x02,0x00,0x00] + vpackusdw 512(%rdx){1to4}, %xmm24, %xmm24 + +// CHECK: vpackusdw -512(%rdx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0x2b,0x42,0x80] + vpackusdw -512(%rdx){1to4}, %xmm24, %xmm24 + +// CHECK: vpackusdw -516(%rdx){1to4}, %xmm24, %xmm24 +// CHECK: encoding: [0x62,0x62,0x3d,0x10,0x2b,0x82,0xfc,0xfd,0xff,0xff] + vpackusdw -516(%rdx){1to4}, %xmm24, %xmm24 + +// CHECK: vpackusdw %ymm21, %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xa2,0x2d,0x20,0x2b,0xdd] + vpackusdw %ymm21, %ymm26, %ymm19 + +// CHECK: vpackusdw %ymm21, %ymm26, %ymm19 {%k1} +// CHECK: encoding: [0x62,0xa2,0x2d,0x21,0x2b,0xdd] + vpackusdw %ymm21, %ymm26, %ymm19 {%k1} + +// CHECK: vpackusdw %ymm21, %ymm26, %ymm19 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x2d,0xa1,0x2b,0xdd] + vpackusdw %ymm21, %ymm26, %ymm19 {%k1} {z} + +// CHECK: vpackusdw (%rcx), %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0x19] + vpackusdw (%rcx), %ymm26, %ymm19 + +// CHECK: vpackusdw 291(%rax,%r14,8), %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xa2,0x2d,0x20,0x2b,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpackusdw 291(%rax,%r14,8), %ymm26, %ymm19 + +// CHECK: vpackusdw (%rcx){1to8}, %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0x19] + vpackusdw (%rcx){1to8}, %ymm26, %ymm19 + +// CHECK: vpackusdw 4064(%rdx), %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0x5a,0x7f] + vpackusdw 4064(%rdx), %ymm26, %ymm19 + +// CHECK: vpackusdw 4096(%rdx), %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0x9a,0x00,0x10,0x00,0x00] + vpackusdw 4096(%rdx), %ymm26, %ymm19 + +// CHECK: vpackusdw -4096(%rdx), %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0x5a,0x80] + vpackusdw -4096(%rdx), %ymm26, %ymm19 + +// CHECK: vpackusdw -4128(%rdx), %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0x9a,0xe0,0xef,0xff,0xff] + vpackusdw -4128(%rdx), %ymm26, %ymm19 + +// CHECK: vpackusdw 508(%rdx){1to8}, %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0x5a,0x7f] + vpackusdw 508(%rdx){1to8}, %ymm26, %ymm19 + +// CHECK: vpackusdw 512(%rdx){1to8}, %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0x9a,0x00,0x02,0x00,0x00] + vpackusdw 512(%rdx){1to8}, %ymm26, %ymm19 + +// CHECK: vpackusdw -512(%rdx){1to8}, %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0x5a,0x80] + vpackusdw -512(%rdx){1to8}, %ymm26, %ymm19 + +// CHECK: vpackusdw -516(%rdx){1to8}, %ymm26, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0x9a,0xfc,0xfd,0xff,0xff] + vpackusdw -516(%rdx){1to8}, %ymm26, %ymm19 + +// CHECK: vpackuswb %xmm17, %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x67,0xe9] + vpackuswb %xmm17, %xmm27, %xmm21 + +// CHECK: vpackuswb %xmm17, %xmm27, %xmm21 {%k4} +// CHECK: encoding: [0x62,0xa1,0xa5,0x04,0x67,0xe9] + vpackuswb %xmm17, %xmm27, %xmm21 {%k4} + +// CHECK: vpackuswb %xmm17, %xmm27, %xmm21 {%k4} {z} +// CHECK: encoding: [0x62,0xa1,0xa5,0x84,0x67,0xe9] + vpackuswb %xmm17, %xmm27, %xmm21 {%k4} {z} + +// CHECK: vpackuswb (%rcx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x67,0x29] + vpackuswb (%rcx), %xmm27, %xmm21 + +// CHECK: vpackuswb 291(%rax,%r14,8), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x67,0xac,0xf0,0x23,0x01,0x00,0x00] + vpackuswb 291(%rax,%r14,8), %xmm27, %xmm21 + +// CHECK: vpackuswb 2032(%rdx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x67,0x6a,0x7f] + vpackuswb 2032(%rdx), %xmm27, %xmm21 + +// CHECK: vpackuswb 2048(%rdx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x67,0xaa,0x00,0x08,0x00,0x00] + vpackuswb 2048(%rdx), %xmm27, %xmm21 + +// CHECK: vpackuswb -2048(%rdx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x67,0x6a,0x80] + vpackuswb -2048(%rdx), %xmm27, %xmm21 + +// CHECK: vpackuswb -2064(%rdx), %xmm27, %xmm21 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x67,0xaa,0xf0,0xf7,0xff,0xff] + vpackuswb -2064(%rdx), %xmm27, %xmm21 + +// CHECK: vpackuswb %ymm20, %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xa1,0x9d,0x20,0x67,0xdc] + vpackuswb %ymm20, %ymm28, %ymm19 + +// CHECK: vpackuswb %ymm20, %ymm28, %ymm19 {%k1} +// CHECK: encoding: [0x62,0xa1,0x9d,0x21,0x67,0xdc] + vpackuswb %ymm20, %ymm28, %ymm19 {%k1} + +// CHECK: vpackuswb %ymm20, %ymm28, %ymm19 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x9d,0xa1,0x67,0xdc] + vpackuswb %ymm20, %ymm28, %ymm19 {%k1} {z} + +// CHECK: vpackuswb (%rcx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x19] + vpackuswb (%rcx), %ymm28, %ymm19 + +// CHECK: vpackuswb 291(%rax,%r14,8), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xa1,0x9d,0x20,0x67,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpackuswb 291(%rax,%r14,8), %ymm28, %ymm19 + +// CHECK: vpackuswb 4064(%rdx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x5a,0x7f] + vpackuswb 4064(%rdx), %ymm28, %ymm19 + +// CHECK: vpackuswb 4096(%rdx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x9a,0x00,0x10,0x00,0x00] + vpackuswb 4096(%rdx), %ymm28, %ymm19 + +// CHECK: vpackuswb -4096(%rdx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x5a,0x80] + vpackuswb -4096(%rdx), %ymm28, %ymm19 + +// CHECK: vpackuswb -4128(%rdx), %ymm28, %ymm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x20,0x67,0x9a,0xe0,0xef,0xff,0xff] + vpackuswb -4128(%rdx), %ymm28, %ymm19 + +// CHECK: vpackssdw %xmm19, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x21,0x75,0x00,0x6b,0xc3] + vpackssdw %xmm19, %xmm17, %xmm24 + +// CHECK: vpackssdw %xmm19, %xmm17, %xmm24 {%k7} +// CHECK: encoding: [0x62,0x21,0x75,0x07,0x6b,0xc3] + vpackssdw %xmm19, %xmm17, %xmm24 {%k7} + +// CHECK: vpackssdw %xmm19, %xmm17, %xmm24 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x75,0x87,0x6b,0xc3] + vpackssdw %xmm19, %xmm17, %xmm24 {%k7} {z} + +// CHECK: vpackssdw (%rcx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0x6b,0x01] + vpackssdw (%rcx), %xmm17, %xmm24 + +// CHECK: vpackssdw 4660(%rax,%r14,8), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x21,0x75,0x00,0x6b,0x84,0xf0,0x34,0x12,0x00,0x00] + vpackssdw 4660(%rax,%r14,8), %xmm17, %xmm24 + +// CHECK: vpackssdw (%rcx){1to4}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x10,0x6b,0x01] + vpackssdw (%rcx){1to4}, %xmm17, %xmm24 + +// CHECK: vpackssdw 2032(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0x6b,0x42,0x7f] + vpackssdw 2032(%rdx), %xmm17, %xmm24 + +// CHECK: vpackssdw 2048(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0x6b,0x82,0x00,0x08,0x00,0x00] + vpackssdw 2048(%rdx), %xmm17, %xmm24 + +// CHECK: vpackssdw -2048(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0x6b,0x42,0x80] + vpackssdw -2048(%rdx), %xmm17, %xmm24 + +// CHECK: vpackssdw -2064(%rdx), %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x00,0x6b,0x82,0xf0,0xf7,0xff,0xff] + vpackssdw -2064(%rdx), %xmm17, %xmm24 + +// CHECK: vpackssdw 508(%rdx){1to4}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x10,0x6b,0x42,0x7f] + vpackssdw 508(%rdx){1to4}, %xmm17, %xmm24 + +// CHECK: vpackssdw 512(%rdx){1to4}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x10,0x6b,0x82,0x00,0x02,0x00,0x00] + vpackssdw 512(%rdx){1to4}, %xmm17, %xmm24 + +// CHECK: vpackssdw -512(%rdx){1to4}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x10,0x6b,0x42,0x80] + vpackssdw -512(%rdx){1to4}, %xmm17, %xmm24 + +// CHECK: vpackssdw -516(%rdx){1to4}, %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x61,0x75,0x10,0x6b,0x82,0xfc,0xfd,0xff,0xff] + vpackssdw -516(%rdx){1to4}, %xmm17, %xmm24 + +// CHECK: vpackssdw %ymm19, %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x21,0x65,0x20,0x6b,0xcb] + vpackssdw %ymm19, %ymm19, %ymm25 + +// CHECK: vpackssdw %ymm19, %ymm19, %ymm25 {%k2} +// CHECK: encoding: [0x62,0x21,0x65,0x22,0x6b,0xcb] + vpackssdw %ymm19, %ymm19, %ymm25 {%k2} + +// CHECK: vpackssdw %ymm19, %ymm19, %ymm25 {%k2} {z} +// CHECK: encoding: [0x62,0x21,0x65,0xa2,0x6b,0xcb] + vpackssdw %ymm19, %ymm19, %ymm25 {%k2} {z} + +// CHECK: vpackssdw (%rcx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0x6b,0x09] + vpackssdw (%rcx), %ymm19, %ymm25 + +// CHECK: vpackssdw 4660(%rax,%r14,8), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x21,0x65,0x20,0x6b,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpackssdw 4660(%rax,%r14,8), %ymm19, %ymm25 + +// CHECK: vpackssdw (%rcx){1to8}, %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x30,0x6b,0x09] + vpackssdw (%rcx){1to8}, %ymm19, %ymm25 + +// CHECK: vpackssdw 4064(%rdx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0x6b,0x4a,0x7f] + vpackssdw 4064(%rdx), %ymm19, %ymm25 + +// CHECK: vpackssdw 4096(%rdx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0x6b,0x8a,0x00,0x10,0x00,0x00] + vpackssdw 4096(%rdx), %ymm19, %ymm25 + +// CHECK: vpackssdw -4096(%rdx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0x6b,0x4a,0x80] + vpackssdw -4096(%rdx), %ymm19, %ymm25 + +// CHECK: vpackssdw -4128(%rdx), %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x20,0x6b,0x8a,0xe0,0xef,0xff,0xff] + vpackssdw -4128(%rdx), %ymm19, %ymm25 + +// CHECK: vpackssdw 508(%rdx){1to8}, %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x30,0x6b,0x4a,0x7f] + vpackssdw 508(%rdx){1to8}, %ymm19, %ymm25 + +// CHECK: vpackssdw 512(%rdx){1to8}, %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x30,0x6b,0x8a,0x00,0x02,0x00,0x00] + vpackssdw 512(%rdx){1to8}, %ymm19, %ymm25 + +// CHECK: vpackssdw -512(%rdx){1to8}, %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x30,0x6b,0x4a,0x80] + vpackssdw -512(%rdx){1to8}, %ymm19, %ymm25 + +// CHECK: vpackssdw -516(%rdx){1to8}, %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x61,0x65,0x30,0x6b,0x8a,0xfc,0xfd,0xff,0xff] + vpackssdw -516(%rdx){1to8}, %ymm19, %ymm25 + +// CHECK: vpacksswb %xmm22, %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x21,0xb5,0x00,0x63,0xee] + vpacksswb %xmm22, %xmm25, %xmm29 + +// CHECK: vpacksswb %xmm22, %xmm25, %xmm29 {%k5} +// CHECK: encoding: [0x62,0x21,0xb5,0x05,0x63,0xee] + vpacksswb %xmm22, %xmm25, %xmm29 {%k5} + +// CHECK: vpacksswb %xmm22, %xmm25, %xmm29 {%k5} {z} +// CHECK: encoding: [0x62,0x21,0xb5,0x85,0x63,0xee] + vpacksswb %xmm22, %xmm25, %xmm29 {%k5} {z} + +// CHECK: vpacksswb (%rcx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x61,0xb5,0x00,0x63,0x29] + vpacksswb (%rcx), %xmm25, %xmm29 + +// CHECK: vpacksswb 4660(%rax,%r14,8), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x21,0xb5,0x00,0x63,0xac,0xf0,0x34,0x12,0x00,0x00] + vpacksswb 4660(%rax,%r14,8), %xmm25, %xmm29 + +// CHECK: vpacksswb 2032(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x61,0xb5,0x00,0x63,0x6a,0x7f] + vpacksswb 2032(%rdx), %xmm25, %xmm29 + +// CHECK: vpacksswb 2048(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x61,0xb5,0x00,0x63,0xaa,0x00,0x08,0x00,0x00] + vpacksswb 2048(%rdx), %xmm25, %xmm29 + +// CHECK: vpacksswb -2048(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x61,0xb5,0x00,0x63,0x6a,0x80] + vpacksswb -2048(%rdx), %xmm25, %xmm29 + +// CHECK: vpacksswb -2064(%rdx), %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x61,0xb5,0x00,0x63,0xaa,0xf0,0xf7,0xff,0xff] + vpacksswb -2064(%rdx), %xmm25, %xmm29 + +// CHECK: vpacksswb %ymm27, %ymm20, %ymm26 +// CHECK: encoding: [0x62,0x01,0xdd,0x20,0x63,0xd3] + vpacksswb %ymm27, %ymm20, %ymm26 + +// CHECK: vpacksswb %ymm27, %ymm20, %ymm26 {%k6} +// CHECK: encoding: [0x62,0x01,0xdd,0x26,0x63,0xd3] + vpacksswb %ymm27, %ymm20, %ymm26 {%k6} + +// CHECK: vpacksswb %ymm27, %ymm20, %ymm26 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0xdd,0xa6,0x63,0xd3] + vpacksswb %ymm27, %ymm20, %ymm26 {%k6} {z} + +// CHECK: vpacksswb (%rcx), %ymm20, %ymm26 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x63,0x11] + vpacksswb (%rcx), %ymm20, %ymm26 + +// CHECK: vpacksswb 4660(%rax,%r14,8), %ymm20, %ymm26 +// CHECK: encoding: [0x62,0x21,0xdd,0x20,0x63,0x94,0xf0,0x34,0x12,0x00,0x00] + vpacksswb 4660(%rax,%r14,8), %ymm20, %ymm26 + +// CHECK: vpacksswb 4064(%rdx), %ymm20, %ymm26 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x63,0x52,0x7f] + vpacksswb 4064(%rdx), %ymm20, %ymm26 + +// CHECK: vpacksswb 4096(%rdx), %ymm20, %ymm26 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x63,0x92,0x00,0x10,0x00,0x00] + vpacksswb 4096(%rdx), %ymm20, %ymm26 + +// CHECK: vpacksswb -4096(%rdx), %ymm20, %ymm26 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x63,0x52,0x80] + vpacksswb -4096(%rdx), %ymm20, %ymm26 + +// CHECK: vpacksswb -4128(%rdx), %ymm20, %ymm26 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x63,0x92,0xe0,0xef,0xff,0xff] + vpacksswb -4128(%rdx), %ymm20, %ymm26 + +// CHECK: vpackusdw %xmm18, %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x22,0x25,0x00,0x2b,0xe2] + vpackusdw %xmm18, %xmm27, %xmm28 + +// CHECK: vpackusdw %xmm18, %xmm27, %xmm28 {%k4} +// CHECK: encoding: [0x62,0x22,0x25,0x04,0x2b,0xe2] + vpackusdw %xmm18, %xmm27, %xmm28 {%k4} + +// CHECK: vpackusdw %xmm18, %xmm27, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x25,0x84,0x2b,0xe2] + vpackusdw %xmm18, %xmm27, %xmm28 {%k4} {z} + +// CHECK: vpackusdw (%rcx), %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x2b,0x21] + vpackusdw (%rcx), %xmm27, %xmm28 + +// CHECK: vpackusdw 4660(%rax,%r14,8), %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x22,0x25,0x00,0x2b,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpackusdw 4660(%rax,%r14,8), %xmm27, %xmm28 + +// CHECK: vpackusdw (%rcx){1to4}, %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x2b,0x21] + vpackusdw (%rcx){1to4}, %xmm27, %xmm28 + +// CHECK: vpackusdw 2032(%rdx), %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x2b,0x62,0x7f] + vpackusdw 2032(%rdx), %xmm27, %xmm28 + +// CHECK: vpackusdw 2048(%rdx), %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x2b,0xa2,0x00,0x08,0x00,0x00] + vpackusdw 2048(%rdx), %xmm27, %xmm28 + +// CHECK: vpackusdw -2048(%rdx), %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x2b,0x62,0x80] + vpackusdw -2048(%rdx), %xmm27, %xmm28 + +// CHECK: vpackusdw -2064(%rdx), %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x00,0x2b,0xa2,0xf0,0xf7,0xff,0xff] + vpackusdw -2064(%rdx), %xmm27, %xmm28 + +// CHECK: vpackusdw 508(%rdx){1to4}, %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x2b,0x62,0x7f] + vpackusdw 508(%rdx){1to4}, %xmm27, %xmm28 + +// CHECK: vpackusdw 512(%rdx){1to4}, %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x2b,0xa2,0x00,0x02,0x00,0x00] + vpackusdw 512(%rdx){1to4}, %xmm27, %xmm28 + +// CHECK: vpackusdw -512(%rdx){1to4}, %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x2b,0x62,0x80] + vpackusdw -512(%rdx){1to4}, %xmm27, %xmm28 + +// CHECK: vpackusdw -516(%rdx){1to4}, %xmm27, %xmm28 +// CHECK: encoding: [0x62,0x62,0x25,0x10,0x2b,0xa2,0xfc,0xfd,0xff,0xff] + vpackusdw -516(%rdx){1to4}, %xmm27, %xmm28 + +// CHECK: vpackusdw %ymm17, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xa2,0x2d,0x20,0x2b,0xf9] + vpackusdw %ymm17, %ymm26, %ymm23 + +// CHECK: vpackusdw %ymm17, %ymm26, %ymm23 {%k4} +// CHECK: encoding: [0x62,0xa2,0x2d,0x24,0x2b,0xf9] + vpackusdw %ymm17, %ymm26, %ymm23 {%k4} + +// CHECK: vpackusdw %ymm17, %ymm26, %ymm23 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x2d,0xa4,0x2b,0xf9] + vpackusdw %ymm17, %ymm26, %ymm23 {%k4} {z} + +// CHECK: vpackusdw (%rcx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0x39] + vpackusdw (%rcx), %ymm26, %ymm23 + +// CHECK: vpackusdw 4660(%rax,%r14,8), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xa2,0x2d,0x20,0x2b,0xbc,0xf0,0x34,0x12,0x00,0x00] + vpackusdw 4660(%rax,%r14,8), %ymm26, %ymm23 + +// CHECK: vpackusdw (%rcx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0x39] + vpackusdw (%rcx){1to8}, %ymm26, %ymm23 + +// CHECK: vpackusdw 4064(%rdx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0x7a,0x7f] + vpackusdw 4064(%rdx), %ymm26, %ymm23 + +// CHECK: vpackusdw 4096(%rdx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0xba,0x00,0x10,0x00,0x00] + vpackusdw 4096(%rdx), %ymm26, %ymm23 + +// CHECK: vpackusdw -4096(%rdx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0x7a,0x80] + vpackusdw -4096(%rdx), %ymm26, %ymm23 + +// CHECK: vpackusdw -4128(%rdx), %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x20,0x2b,0xba,0xe0,0xef,0xff,0xff] + vpackusdw -4128(%rdx), %ymm26, %ymm23 + +// CHECK: vpackusdw 508(%rdx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0x7a,0x7f] + vpackusdw 508(%rdx){1to8}, %ymm26, %ymm23 + +// CHECK: vpackusdw 512(%rdx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0xba,0x00,0x02,0x00,0x00] + vpackusdw 512(%rdx){1to8}, %ymm26, %ymm23 + +// CHECK: vpackusdw -512(%rdx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0x7a,0x80] + vpackusdw -512(%rdx){1to8}, %ymm26, %ymm23 + +// CHECK: vpackusdw -516(%rdx){1to8}, %ymm26, %ymm23 +// CHECK: encoding: [0x62,0xe2,0x2d,0x30,0x2b,0xba,0xfc,0xfd,0xff,0xff] + vpackusdw -516(%rdx){1to8}, %ymm26, %ymm23 + +// CHECK: vpackuswb %xmm23, %xmm20, %xmm20 +// CHECK: encoding: [0x62,0xa1,0xdd,0x00,0x67,0xe7] + vpackuswb %xmm23, %xmm20, %xmm20 + +// CHECK: vpackuswb %xmm23, %xmm20, %xmm20 {%k2} +// CHECK: encoding: [0x62,0xa1,0xdd,0x02,0x67,0xe7] + vpackuswb %xmm23, %xmm20, %xmm20 {%k2} + +// CHECK: vpackuswb %xmm23, %xmm20, %xmm20 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0xdd,0x82,0x67,0xe7] + vpackuswb %xmm23, %xmm20, %xmm20 {%k2} {z} + +// CHECK: vpackuswb (%rcx), %xmm20, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xdd,0x00,0x67,0x21] + vpackuswb (%rcx), %xmm20, %xmm20 + +// CHECK: vpackuswb 4660(%rax,%r14,8), %xmm20, %xmm20 +// CHECK: encoding: [0x62,0xa1,0xdd,0x00,0x67,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpackuswb 4660(%rax,%r14,8), %xmm20, %xmm20 + +// CHECK: vpackuswb 2032(%rdx), %xmm20, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xdd,0x00,0x67,0x62,0x7f] + vpackuswb 2032(%rdx), %xmm20, %xmm20 + +// CHECK: vpackuswb 2048(%rdx), %xmm20, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xdd,0x00,0x67,0xa2,0x00,0x08,0x00,0x00] + vpackuswb 2048(%rdx), %xmm20, %xmm20 + +// CHECK: vpackuswb -2048(%rdx), %xmm20, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xdd,0x00,0x67,0x62,0x80] + vpackuswb -2048(%rdx), %xmm20, %xmm20 + +// CHECK: vpackuswb -2064(%rdx), %xmm20, %xmm20 +// CHECK: encoding: [0x62,0xe1,0xdd,0x00,0x67,0xa2,0xf0,0xf7,0xff,0xff] + vpackuswb -2064(%rdx), %xmm20, %xmm20 + +// CHECK: vpackuswb %ymm27, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0x81,0xe5,0x20,0x67,0xe3] + vpackuswb %ymm27, %ymm19, %ymm20 + +// CHECK: vpackuswb %ymm27, %ymm19, %ymm20 {%k5} +// CHECK: encoding: [0x62,0x81,0xe5,0x25,0x67,0xe3] + vpackuswb %ymm27, %ymm19, %ymm20 {%k5} + +// CHECK: vpackuswb %ymm27, %ymm19, %ymm20 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0xe5,0xa5,0x67,0xe3] + vpackuswb %ymm27, %ymm19, %ymm20 {%k5} {z} + +// CHECK: vpackuswb (%rcx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x67,0x21] + vpackuswb (%rcx), %ymm19, %ymm20 + +// CHECK: vpackuswb 4660(%rax,%r14,8), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xa1,0xe5,0x20,0x67,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpackuswb 4660(%rax,%r14,8), %ymm19, %ymm20 + +// CHECK: vpackuswb 4064(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x67,0x62,0x7f] + vpackuswb 4064(%rdx), %ymm19, %ymm20 + +// CHECK: vpackuswb 4096(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x67,0xa2,0x00,0x10,0x00,0x00] + vpackuswb 4096(%rdx), %ymm19, %ymm20 + +// CHECK: vpackuswb -4096(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x67,0x62,0x80] + vpackuswb -4096(%rdx), %ymm19, %ymm20 + +// CHECK: vpackuswb -4128(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x67,0xa2,0xe0,0xef,0xff,0xff] + vpackuswb -4128(%rdx), %ymm19, %ymm20