AVX-512: Added VPACK* instructions forms for KNL and SKX

and their intrinsics
by Asaf Badouh (asaf.badouh@intel.com)


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236414 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2015-05-04 09:14:02 +00:00
parent bf2b36bae1
commit 869807297d
7 changed files with 2221 additions and 0 deletions

View File

@ -2991,6 +2991,46 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_i64_ty], [IntrNoMem]>;
}
// Pack ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128_mask">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packsswb_256 : GCCBuiltin<"__builtin_ia32_packsswb256_mask">,
Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,llvm_v16i16_ty,
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packsswb_512 : GCCBuiltin<"__builtin_ia32_packsswb512_mask">,
Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty,
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packssdw_256 : GCCBuiltin<"__builtin_ia32_packssdw256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packssdw_512 : GCCBuiltin<"__builtin_ia32_packssdw512_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128_mask">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packuswb_256 : GCCBuiltin<"__builtin_ia32_packuswb256_mask">,
Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,llvm_v16i16_ty,
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packuswb_512 : GCCBuiltin<"__builtin_ia32_packuswb512_mask">,
Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty,
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packusdw_128 : GCCBuiltin<"__builtin_ia32_packusdw128_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packusdw_256 : GCCBuiltin<"__builtin_ia32_packusdw256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_packusdw_512 : GCCBuiltin<"__builtin_ia32_packusdw512_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
}
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_cvttps2dq_512: GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,

View File

@ -3118,6 +3118,77 @@ defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
X86pmuludq, 1>;
multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
let mayLoad = 1 in {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr,
"${src2}"##_Src.BroadcastStr##", $src1",
"$src1, ${src2}"##_Src.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Src.VT (X86VBroadcast
(_Src.ScalarLdFrag addr:$src2)))))),
"">,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
}
}
multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
SDNode OpNode,X86VectorVTInfo _Src,
X86VectorVTInfo _Dst> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1","$src1, $src2",
(_Dst.VT (OpNode
(_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2))),
"">, EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
let mayLoad = 1 in {
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
(bitconvert (_Src.LdFrag addr:$src2)))),
"">, EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
}
}
multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
v32i16_info>,
avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
v32i16_info>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
v16i16x_info>,
avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
v16i16x_info>, EVEX_V256;
defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
v8i16x_info>,
avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
v8i16x_info>, EVEX_V128;
}
}
multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
v64i8_info>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
v32i8x_info>, EVEX_V256;
defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
v16i8x_info>, EVEX_V128;
}
}
let Predicates = [HasBWI] in {
defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD;
defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD;
defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W;
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
}
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax,

View File

@ -353,6 +353,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packsswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packsswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packsswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_mask_packusdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_mask_packusdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_mask_packusdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_mask_packuswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_mask_packuswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_mask_packuswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_b_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_b_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_b_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),

View File

@ -351,3 +351,252 @@ define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
;CHECK-LABEL: test_mask_packs_epi32_rr_512
;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rrk_512
;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi32_rm_512
;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmk_512
;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi32_rmb_512
;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
;CHECK-LABEL: test_mask_packs_epi16_rr_512
;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rrk_512
;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi16_rm_512
;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rmk_512
;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
;CHECK-LABEL: test_mask_packus_epi32_rr_512
;CHECK: vpackusdw %zmm1, %zmm0, %zmm0
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rrk_512
;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi32_rm_512
;CHECK: vpackusdw (%rdi), %zmm0, %zmm0
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmk_512
;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi32_rmb_512
;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
;CHECK-LABEL: test_mask_packus_epi16_rr_512
;CHECK: vpackuswb %zmm1, %zmm0, %zmm0
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rrk_512
;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi16_rm_512
;CHECK: vpackuswb (%rdi), %zmm0, %zmm0
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rmk_512
;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)

View File

@ -1419,3 +1419,501 @@ define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %pt
declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
;CHECK-LABEL: test_mask_packs_epi32_rr_128
;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rrk_128
;CHECK: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rrkz_128
;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi32_rm_128
;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07]
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmk_128
;CHECK: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmkz_128
;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi32_rmb_128
;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmbk_128
;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmbkz_128
;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
;CHECK-LABEL: test_mask_packs_epi32_rr_256
;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rrk_256
;CHECK: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rrkz_256
;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi32_rm_256
;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07]
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmk_256
;CHECK: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmkz_256
;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi32_rmb_256
;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmbk_256
;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi32_rmbkz_256
;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
%res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
;CHECK-LABEL: test_mask_packs_epi16_rr_128
;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
%res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rrk_128
;CHECK: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0xd1]
%res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rrkz_128
;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0xc1]
%res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi16_rm_128
;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rmk_128
;CHECK: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0x0f]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rmkz_128
;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
;CHECK-LABEL: test_mask_packs_epi16_rr_256
;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
%res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rrk_256
;CHECK: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0xd1]
%res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rrkz_256
;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0xc1]
%res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_packs_epi16_rm_256
;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rmk_256
;CHECK: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0x0f]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_packs_epi16_rmkz_256
;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
;CHECK-LABEL: test_mask_packus_epi32_rr_128
;CHECK: vpackusdw %xmm1, %xmm0, %xmm0
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rrk_128
;CHECK: vpackusdw %xmm1, %xmm0, %xmm2 {%k1}
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rrkz_128
;CHECK: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi32_rm_128
;CHECK: vpackusdw (%rdi), %xmm0, %xmm0
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmk_128
;CHECK: vpackusdw (%rdi), %xmm0, %xmm1 {%k1}
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmkz_128
;CHECK: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z}
%b = load <4 x i32>, <4 x i32>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi32_rmb_128
;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmbk_128
;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1}
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmbkz_128
;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z}
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
;CHECK-LABEL: test_mask_packus_epi32_rr_256
;CHECK: vpackusdw %ymm1, %ymm0, %ymm0
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rrk_256
;CHECK: vpackusdw %ymm1, %ymm0, %ymm2 {%k1}
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rrkz_256
;CHECK: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z}
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi32_rm_256
;CHECK: vpackusdw (%rdi), %ymm0, %ymm0
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmk_256
;CHECK: vpackusdw (%rdi), %ymm0, %ymm1 {%k1}
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmkz_256
;CHECK: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z}
%b = load <8 x i32>, <8 x i32>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi32_rmb_256
;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmbk_256
;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1}
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi32_rmbkz_256
;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z}
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
%b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
%res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
;CHECK-LABEL: test_mask_packus_epi16_rr_128
;CHECK: vpackuswb %xmm1, %xmm0, %xmm0
%res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rrk_128
;CHECK: vpackuswb %xmm1, %xmm0, %xmm2 {%k1}
%res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rrkz_128
;CHECK: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi16_rm_128
;CHECK: vpackuswb (%rdi), %xmm0, %xmm0
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rmk_128
;CHECK: vpackuswb (%rdi), %xmm0, %xmm1 {%k1}
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rmkz_128
;CHECK: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z}
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
;CHECK-LABEL: test_mask_packus_epi16_rr_256
;CHECK: vpackuswb %ymm1, %ymm0, %ymm0
%res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rrk_256
;CHECK: vpackuswb %ymm1, %ymm0, %ymm2 {%k1}
%res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rrkz_256
;CHECK: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z}
%res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_packus_epi16_rm_256
;CHECK: vpackuswb (%rdi), %ymm0, %ymm0
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rmk_256
;CHECK: vpackuswb (%rdi), %ymm0, %ymm1 {%k1}
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_packus_epi16_rmkz_256
;CHECK: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z}
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)

View File

@ -2008,3 +2008,330 @@
// CHECK: encoding: [0x62,0xe2,0x95,0x40,0x11,0x9a,0xc0,0xdf,0xff,0xff]
vpsravw -8256(%rdx), %zmm29, %zmm19
// CHECK: vpacksswb %zmm18, %zmm29, %zmm21
// CHECK: encoding: [0x62,0xa1,0x95,0x40,0x63,0xea]
vpacksswb %zmm18, %zmm29, %zmm21
// CHECK: vpacksswb %zmm18, %zmm29, %zmm21 {%k1}
// CHECK: encoding: [0x62,0xa1,0x95,0x41,0x63,0xea]
vpacksswb %zmm18, %zmm29, %zmm21 {%k1}
// CHECK: vpacksswb %zmm18, %zmm29, %zmm21 {%k1} {z}
// CHECK: encoding: [0x62,0xa1,0x95,0xc1,0x63,0xea]
vpacksswb %zmm18, %zmm29, %zmm21 {%k1} {z}
// CHECK: vpacksswb (%rcx), %zmm29, %zmm21
// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0x29]
vpacksswb (%rcx), %zmm29, %zmm21
// CHECK: vpacksswb 291(%rax,%r14,8), %zmm29, %zmm21
// CHECK: encoding: [0x62,0xa1,0x95,0x40,0x63,0xac,0xf0,0x23,0x01,0x00,0x00]
vpacksswb 291(%rax,%r14,8), %zmm29, %zmm21
// CHECK: vpacksswb 8128(%rdx), %zmm29, %zmm21
// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0x6a,0x7f]
vpacksswb 8128(%rdx), %zmm29, %zmm21
// CHECK: vpacksswb 8192(%rdx), %zmm29, %zmm21
// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0xaa,0x00,0x20,0x00,0x00]
vpacksswb 8192(%rdx), %zmm29, %zmm21
// CHECK: vpacksswb -8192(%rdx), %zmm29, %zmm21
// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0x6a,0x80]
vpacksswb -8192(%rdx), %zmm29, %zmm21
// CHECK: vpacksswb -8256(%rdx), %zmm29, %zmm21
// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x63,0xaa,0xc0,0xdf,0xff,0xff]
vpacksswb -8256(%rdx), %zmm29, %zmm21
// CHECK: vpackuswb %zmm28, %zmm24, %zmm28
// CHECK: encoding: [0x62,0x01,0xbd,0x40,0x67,0xe4]
vpackuswb %zmm28, %zmm24, %zmm28
// CHECK: vpackuswb %zmm28, %zmm24, %zmm28 {%k6}
// CHECK: encoding: [0x62,0x01,0xbd,0x46,0x67,0xe4]
vpackuswb %zmm28, %zmm24, %zmm28 {%k6}
// CHECK: vpackuswb %zmm28, %zmm24, %zmm28 {%k6} {z}
// CHECK: encoding: [0x62,0x01,0xbd,0xc6,0x67,0xe4]
vpackuswb %zmm28, %zmm24, %zmm28 {%k6} {z}
// CHECK: vpackuswb (%rcx), %zmm24, %zmm28
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0x21]
vpackuswb (%rcx), %zmm24, %zmm28
// CHECK: vpackuswb 291(%rax,%r14,8), %zmm24, %zmm28
// CHECK: encoding: [0x62,0x21,0xbd,0x40,0x67,0xa4,0xf0,0x23,0x01,0x00,0x00]
vpackuswb 291(%rax,%r14,8), %zmm24, %zmm28
// CHECK: vpackuswb 8128(%rdx), %zmm24, %zmm28
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0x62,0x7f]
vpackuswb 8128(%rdx), %zmm24, %zmm28
// CHECK: vpackuswb 8192(%rdx), %zmm24, %zmm28
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0xa2,0x00,0x20,0x00,0x00]
vpackuswb 8192(%rdx), %zmm24, %zmm28
// CHECK: vpackuswb -8192(%rdx), %zmm24, %zmm28
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0x62,0x80]
vpackuswb -8192(%rdx), %zmm24, %zmm28
// CHECK: vpackuswb -8256(%rdx), %zmm24, %zmm28
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x67,0xa2,0xc0,0xdf,0xff,0xff]
vpackuswb -8256(%rdx), %zmm24, %zmm28
// CHECK: vpacksswb %zmm24, %zmm29, %zmm26
// CHECK: encoding: [0x62,0x01,0x95,0x40,0x63,0xd0]
vpacksswb %zmm24, %zmm29, %zmm26
// CHECK: vpacksswb %zmm24, %zmm29, %zmm26 {%k6}
// CHECK: encoding: [0x62,0x01,0x95,0x46,0x63,0xd0]
vpacksswb %zmm24, %zmm29, %zmm26 {%k6}
// CHECK: vpacksswb %zmm24, %zmm29, %zmm26 {%k6} {z}
// CHECK: encoding: [0x62,0x01,0x95,0xc6,0x63,0xd0]
vpacksswb %zmm24, %zmm29, %zmm26 {%k6} {z}
// CHECK: vpacksswb (%rcx), %zmm29, %zmm26
// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x11]
vpacksswb (%rcx), %zmm29, %zmm26
// CHECK: vpacksswb 4660(%rax,%r14,8), %zmm29, %zmm26
// CHECK: encoding: [0x62,0x21,0x95,0x40,0x63,0x94,0xf0,0x34,0x12,0x00,0x00]
vpacksswb 4660(%rax,%r14,8), %zmm29, %zmm26
// CHECK: vpacksswb 8128(%rdx), %zmm29, %zmm26
// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x52,0x7f]
vpacksswb 8128(%rdx), %zmm29, %zmm26
// CHECK: vpacksswb 8192(%rdx), %zmm29, %zmm26
// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x92,0x00,0x20,0x00,0x00]
vpacksswb 8192(%rdx), %zmm29, %zmm26
// CHECK: vpacksswb -8192(%rdx), %zmm29, %zmm26
// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x52,0x80]
vpacksswb -8192(%rdx), %zmm29, %zmm26
// CHECK: vpacksswb -8256(%rdx), %zmm29, %zmm26
// CHECK: encoding: [0x62,0x61,0x95,0x40,0x63,0x92,0xc0,0xdf,0xff,0xff]
vpacksswb -8256(%rdx), %zmm29, %zmm26
// CHECK: vpackuswb %zmm23, %zmm28, %zmm18
// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x67,0xd7]
vpackuswb %zmm23, %zmm28, %zmm18
// CHECK: vpackuswb %zmm23, %zmm28, %zmm18 {%k4}
// CHECK: encoding: [0x62,0xa1,0x9d,0x44,0x67,0xd7]
vpackuswb %zmm23, %zmm28, %zmm18 {%k4}
// CHECK: vpackuswb %zmm23, %zmm28, %zmm18 {%k4} {z}
// CHECK: encoding: [0x62,0xa1,0x9d,0xc4,0x67,0xd7]
vpackuswb %zmm23, %zmm28, %zmm18 {%k4} {z}
// CHECK: vpackuswb (%rcx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x11]
vpackuswb (%rcx), %zmm28, %zmm18
// CHECK: vpackuswb 4660(%rax,%r14,8), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x67,0x94,0xf0,0x34,0x12,0x00,0x00]
vpackuswb 4660(%rax,%r14,8), %zmm28, %zmm18
// CHECK: vpackuswb 8128(%rdx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x52,0x7f]
vpackuswb 8128(%rdx), %zmm28, %zmm18
// CHECK: vpackuswb 8192(%rdx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x92,0x00,0x20,0x00,0x00]
vpackuswb 8192(%rdx), %zmm28, %zmm18
// CHECK: vpackuswb -8192(%rdx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x52,0x80]
vpackuswb -8192(%rdx), %zmm28, %zmm18
// CHECK: vpackuswb -8256(%rdx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x67,0x92,0xc0,0xdf,0xff,0xff]
vpackuswb -8256(%rdx), %zmm28, %zmm18
// CHECK: vpackssdw %zmm22, %zmm27, %zmm24
// CHECK: encoding: [0x62,0x21,0x25,0x40,0x6b,0xc6]
vpackssdw %zmm22, %zmm27, %zmm24
// CHECK: vpackssdw %zmm22, %zmm27, %zmm24 {%k2}
// CHECK: encoding: [0x62,0x21,0x25,0x42,0x6b,0xc6]
vpackssdw %zmm22, %zmm27, %zmm24 {%k2}
// CHECK: vpackssdw %zmm22, %zmm27, %zmm24 {%k2} {z}
// CHECK: encoding: [0x62,0x21,0x25,0xc2,0x6b,0xc6]
vpackssdw %zmm22, %zmm27, %zmm24 {%k2} {z}
// CHECK: vpackssdw (%rcx), %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x01]
vpackssdw (%rcx), %zmm27, %zmm24
// CHECK: vpackssdw 291(%rax,%r14,8), %zmm27, %zmm24
// CHECK: encoding: [0x62,0x21,0x25,0x40,0x6b,0x84,0xf0,0x23,0x01,0x00,0x00]
vpackssdw 291(%rax,%r14,8), %zmm27, %zmm24
// CHECK: vpackssdw (%rcx){1to16}, %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x01]
vpackssdw (%rcx){1to16}, %zmm27, %zmm24
// CHECK: vpackssdw 8128(%rdx), %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x42,0x7f]
vpackssdw 8128(%rdx), %zmm27, %zmm24
// CHECK: vpackssdw 8192(%rdx), %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x82,0x00,0x20,0x00,0x00]
vpackssdw 8192(%rdx), %zmm27, %zmm24
// CHECK: vpackssdw -8192(%rdx), %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x42,0x80]
vpackssdw -8192(%rdx), %zmm27, %zmm24
// CHECK: vpackssdw -8256(%rdx), %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x40,0x6b,0x82,0xc0,0xdf,0xff,0xff]
vpackssdw -8256(%rdx), %zmm27, %zmm24
// CHECK: vpackssdw 508(%rdx){1to16}, %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x42,0x7f]
vpackssdw 508(%rdx){1to16}, %zmm27, %zmm24
// CHECK: vpackssdw 512(%rdx){1to16}, %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x82,0x00,0x02,0x00,0x00]
vpackssdw 512(%rdx){1to16}, %zmm27, %zmm24
// CHECK: vpackssdw -512(%rdx){1to16}, %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x42,0x80]
vpackssdw -512(%rdx){1to16}, %zmm27, %zmm24
// CHECK: vpackssdw -516(%rdx){1to16}, %zmm27, %zmm24
// CHECK: encoding: [0x62,0x61,0x25,0x50,0x6b,0x82,0xfc,0xfd,0xff,0xff]
vpackssdw -516(%rdx){1to16}, %zmm27, %zmm24
// CHECK: vpacksswb %zmm23, %zmm18, %zmm28
// CHECK: encoding: [0x62,0x21,0xed,0x40,0x63,0xe7]
vpacksswb %zmm23, %zmm18, %zmm28
// CHECK: vpacksswb %zmm23, %zmm18, %zmm28 {%k3}
// CHECK: encoding: [0x62,0x21,0xed,0x43,0x63,0xe7]
vpacksswb %zmm23, %zmm18, %zmm28 {%k3}
// CHECK: vpacksswb %zmm23, %zmm18, %zmm28 {%k3} {z}
// CHECK: encoding: [0x62,0x21,0xed,0xc3,0x63,0xe7]
vpacksswb %zmm23, %zmm18, %zmm28 {%k3} {z}
// CHECK: vpacksswb (%rcx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0x21]
vpacksswb (%rcx), %zmm18, %zmm28
// CHECK: vpacksswb 291(%rax,%r14,8), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x21,0xed,0x40,0x63,0xa4,0xf0,0x23,0x01,0x00,0x00]
vpacksswb 291(%rax,%r14,8), %zmm18, %zmm28
// CHECK: vpacksswb 8128(%rdx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0x62,0x7f]
vpacksswb 8128(%rdx), %zmm18, %zmm28
// CHECK: vpacksswb 8192(%rdx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0xa2,0x00,0x20,0x00,0x00]
vpacksswb 8192(%rdx), %zmm18, %zmm28
// CHECK: vpacksswb -8192(%rdx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0x62,0x80]
vpacksswb -8192(%rdx), %zmm18, %zmm28
// CHECK: vpacksswb -8256(%rdx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x63,0xa2,0xc0,0xdf,0xff,0xff]
vpacksswb -8256(%rdx), %zmm18, %zmm28
// CHECK: vpackusdw %zmm21, %zmm29, %zmm18
// CHECK: encoding: [0x62,0xa2,0x15,0x40,0x2b,0xd5]
vpackusdw %zmm21, %zmm29, %zmm18
// CHECK: vpackusdw %zmm21, %zmm29, %zmm18 {%k2}
// CHECK: encoding: [0x62,0xa2,0x15,0x42,0x2b,0xd5]
vpackusdw %zmm21, %zmm29, %zmm18 {%k2}
// CHECK: vpackusdw %zmm21, %zmm29, %zmm18 {%k2} {z}
// CHECK: encoding: [0x62,0xa2,0x15,0xc2,0x2b,0xd5]
vpackusdw %zmm21, %zmm29, %zmm18 {%k2} {z}
// CHECK: vpackusdw (%rcx), %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x11]
vpackusdw (%rcx), %zmm29, %zmm18
// CHECK: vpackusdw 291(%rax,%r14,8), %zmm29, %zmm18
// CHECK: encoding: [0x62,0xa2,0x15,0x40,0x2b,0x94,0xf0,0x23,0x01,0x00,0x00]
vpackusdw 291(%rax,%r14,8), %zmm29, %zmm18
// CHECK: vpackusdw (%rcx){1to16}, %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x11]
vpackusdw (%rcx){1to16}, %zmm29, %zmm18
// CHECK: vpackusdw 8128(%rdx), %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x52,0x7f]
vpackusdw 8128(%rdx), %zmm29, %zmm18
// CHECK: vpackusdw 8192(%rdx), %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x92,0x00,0x20,0x00,0x00]
vpackusdw 8192(%rdx), %zmm29, %zmm18
// CHECK: vpackusdw -8192(%rdx), %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x52,0x80]
vpackusdw -8192(%rdx), %zmm29, %zmm18
// CHECK: vpackusdw -8256(%rdx), %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x2b,0x92,0xc0,0xdf,0xff,0xff]
vpackusdw -8256(%rdx), %zmm29, %zmm18
// CHECK: vpackusdw 508(%rdx){1to16}, %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x52,0x7f]
vpackusdw 508(%rdx){1to16}, %zmm29, %zmm18
// CHECK: vpackusdw 512(%rdx){1to16}, %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x92,0x00,0x02,0x00,0x00]
vpackusdw 512(%rdx){1to16}, %zmm29, %zmm18
// CHECK: vpackusdw -512(%rdx){1to16}, %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x52,0x80]
vpackusdw -512(%rdx){1to16}, %zmm29, %zmm18
// CHECK: vpackusdw -516(%rdx){1to16}, %zmm29, %zmm18
// CHECK: encoding: [0x62,0xe2,0x15,0x50,0x2b,0x92,0xfc,0xfd,0xff,0xff]
vpackusdw -516(%rdx){1to16}, %zmm29, %zmm18
// CHECK: vpackuswb %zmm27, %zmm18, %zmm23
// CHECK: encoding: [0x62,0x81,0xed,0x40,0x67,0xfb]
vpackuswb %zmm27, %zmm18, %zmm23
// CHECK: vpackuswb %zmm27, %zmm18, %zmm23 {%k5}
// CHECK: encoding: [0x62,0x81,0xed,0x45,0x67,0xfb]
vpackuswb %zmm27, %zmm18, %zmm23 {%k5}
// CHECK: vpackuswb %zmm27, %zmm18, %zmm23 {%k5} {z}
// CHECK: encoding: [0x62,0x81,0xed,0xc5,0x67,0xfb]
vpackuswb %zmm27, %zmm18, %zmm23 {%k5} {z}
// CHECK: vpackuswb (%rcx), %zmm18, %zmm23
// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0x39]
vpackuswb (%rcx), %zmm18, %zmm23
// CHECK: vpackuswb 291(%rax,%r14,8), %zmm18, %zmm23
// CHECK: encoding: [0x62,0xa1,0xed,0x40,0x67,0xbc,0xf0,0x23,0x01,0x00,0x00]
vpackuswb 291(%rax,%r14,8), %zmm18, %zmm23
// CHECK: vpackuswb 8128(%rdx), %zmm18, %zmm23
// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0x7a,0x7f]
vpackuswb 8128(%rdx), %zmm18, %zmm23
// CHECK: vpackuswb 8192(%rdx), %zmm18, %zmm23
// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0xba,0x00,0x20,0x00,0x00]
vpackuswb 8192(%rdx), %zmm18, %zmm23
// CHECK: vpackuswb -8192(%rdx), %zmm18, %zmm23
// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0x7a,0x80]
vpackuswb -8192(%rdx), %zmm18, %zmm23
// CHECK: vpackuswb -8256(%rdx), %zmm18, %zmm23
// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0xba,0xc0,0xdf,0xff,0xff]
vpackuswb -8256(%rdx), %zmm18, %zmm23

File diff suppressed because it is too large Load Diff