mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
AVX-512: added integer "add" and "sub" instructions with saturation for SKX
with intrinsics and tests by Asaf Badouh (asaf.badouh@intel.com) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236418 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
44ea6d9cba
commit
70a6f4522a
@ -3425,6 +3425,42 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_padd_w_512 : GCCBuiltin<"__builtin_ia32_paddw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padds_b_128 : GCCBuiltin<"__builtin_ia32_paddsb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padds_b_256 : GCCBuiltin<"__builtin_ia32_paddsb256_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padds_w_128 : GCCBuiltin<"__builtin_ia32_paddsw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padds_w_256 : GCCBuiltin<"__builtin_ia32_paddsw256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_paddus_b_128 : GCCBuiltin<"__builtin_ia32_paddusb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_paddus_b_256 : GCCBuiltin<"__builtin_ia32_paddusb256_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_paddus_b_512 : GCCBuiltin<"__builtin_ia32_paddusb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_paddus_w_128 : GCCBuiltin<"__builtin_ia32_paddusw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_paddus_w_256 : GCCBuiltin<"__builtin_ia32_paddusw256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padd_d_128 : GCCBuiltin<"__builtin_ia32_paddd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
@ -3461,6 +3497,42 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_psub_w_512 : GCCBuiltin<"__builtin_ia32_psubw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_b_128 : GCCBuiltin<"__builtin_ia32_psubsb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_b_256 : GCCBuiltin<"__builtin_ia32_psubsb256_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_w_128 : GCCBuiltin<"__builtin_ia32_psubsw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_w_256 : GCCBuiltin<"__builtin_ia32_psubsw256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubus_b_128 : GCCBuiltin<"__builtin_ia32_psubusb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubus_b_256 : GCCBuiltin<"__builtin_ia32_psubusb256_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubus_b_512 : GCCBuiltin<"__builtin_ia32_psubusb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubus_w_128 : GCCBuiltin<"__builtin_ia32_psubusw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubus_w_256 : GCCBuiltin<"__builtin_ia32_psubusw256_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psub_d_128 : GCCBuiltin<"__builtin_ia32_psubd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
@ -206,8 +206,12 @@ namespace llvm {
|
||||
FMUL_RND,
|
||||
FDIV_RND,
|
||||
|
||||
// Integer sub with unsigned saturation.
|
||||
// Integer add/sub with unsigned saturation.
|
||||
ADDUS,
|
||||
SUBUS,
|
||||
// Integer add/sub with signed saturation.
|
||||
ADDS,
|
||||
SUBS,
|
||||
|
||||
/// Integer horizontal add.
|
||||
HADD,
|
||||
|
@ -3089,6 +3089,14 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
|
||||
SSE_INTALU_ITINS_P, 1>;
|
||||
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
|
||||
SSE_INTALU_ITINS_P, 0>;
|
||||
defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 1>;
|
||||
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 0>;
|
||||
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 1>;
|
||||
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 0>;
|
||||
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul,
|
||||
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
|
||||
defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
|
||||
|
@ -173,7 +173,10 @@ def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
|
||||
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>]>;
|
||||
def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
|
||||
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
|
||||
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
|
||||
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
|
||||
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
||||
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
|
||||
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
|
||||
|
@ -377,6 +377,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_w_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_w_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padd_w_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
|
||||
@ -470,6 +482,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_w_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_w_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psub_w_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubus_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubus_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubus_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
|
||||
|
@ -599,4 +599,192 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rr_512
|
||||
;CHECK: vpaddsw %zmm1, %zmm0, %zmm0
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rrk_512
|
||||
;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
|
||||
;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rm_512
|
||||
;CHECK: vpaddsw (%rdi), %zmm0, %zmm0
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rmk_512
|
||||
;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
|
||||
;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rr_512
|
||||
;CHECK: vpsubsw %zmm1, %zmm0, %zmm0
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rrk_512
|
||||
;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
|
||||
;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rm_512
|
||||
;CHECK: vpsubsw (%rdi), %zmm0, %zmm0
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rmk_512
|
||||
;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
|
||||
;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rr_512
|
||||
;CHECK: vpaddusw %zmm1, %zmm0, %zmm0
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rrk_512
|
||||
;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
|
||||
;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rm_512
|
||||
;CHECK: vpaddusw (%rdi), %zmm0, %zmm0
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rmk_512
|
||||
;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
|
||||
;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rr_512
|
||||
;CHECK: vpsubusw %zmm1, %zmm0, %zmm0
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rrk_512
|
||||
;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
|
||||
;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rm_512
|
||||
;CHECK: vpsubusw (%rdi), %zmm0, %zmm0
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rmk_512
|
||||
;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
|
||||
;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
@ -1917,3 +1917,754 @@ define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %pt
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
|
||||
|
||||
define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rr_128
|
||||
;CHECK: vpaddsw %xmm1, %xmm0, %xmm0
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rrk_128
|
||||
;CHECK: vpaddsw %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rrkz_128
|
||||
;CHECK: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rm_128
|
||||
;CHECK: vpaddsw (%rdi), %xmm0, %xmm0
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rmk_128
|
||||
;CHECK: vpaddsw (%rdi), %xmm0, %xmm1 {%k1}
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rmkz_128
|
||||
;CHECK: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rr_256
|
||||
;CHECK: vpaddsw %ymm1, %ymm0, %ymm0
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rrk_256
|
||||
;CHECK: vpaddsw %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rrkz_256
|
||||
;CHECK: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rm_256
|
||||
;CHECK: vpaddsw (%rdi), %ymm0, %ymm0
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rmk_256
|
||||
;CHECK: vpaddsw (%rdi), %ymm0, %ymm1 {%k1}
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi16_rmkz_256
|
||||
;CHECK: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z}
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rr_128
|
||||
;CHECK: vpsubsw %xmm1, %xmm0, %xmm0
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rrk_128
|
||||
;CHECK: vpsubsw %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rrkz_128
|
||||
;CHECK: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rm_128
|
||||
;CHECK: vpsubsw (%rdi), %xmm0, %xmm0
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rmk_128
|
||||
;CHECK: vpsubsw (%rdi), %xmm0, %xmm1 {%k1}
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rmkz_128
|
||||
;CHECK: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rr_256
|
||||
;CHECK: vpsubsw %ymm1, %ymm0, %ymm0
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rrk_256
|
||||
;CHECK: vpsubsw %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rrkz_256
|
||||
;CHECK: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rm_256
|
||||
;CHECK: vpsubsw (%rdi), %ymm0, %ymm0
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rmk_256
|
||||
;CHECK: vpsubsw (%rdi), %ymm0, %ymm1 {%k1}
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi16_rmkz_256
|
||||
;CHECK: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z}
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rr_128
|
||||
;CHECK: vpaddusw %xmm1, %xmm0, %xmm0
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rrk_128
|
||||
;CHECK: vpaddusw %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rrkz_128
|
||||
;CHECK: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rm_128
|
||||
;CHECK: vpaddusw (%rdi), %xmm0, %xmm0
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rmk_128
|
||||
;CHECK: vpaddusw (%rdi), %xmm0, %xmm1 {%k1}
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rmkz_128
|
||||
;CHECK: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rr_256
|
||||
;CHECK: vpaddusw %ymm1, %ymm0, %ymm0
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rrk_256
|
||||
;CHECK: vpaddusw %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rrkz_256
|
||||
;CHECK: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rm_256
|
||||
;CHECK: vpaddusw (%rdi), %ymm0, %ymm0
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rmk_256
|
||||
;CHECK: vpaddusw (%rdi), %ymm0, %ymm1 {%k1}
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu16_rmkz_256
|
||||
;CHECK: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z}
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rr_128
|
||||
;CHECK: vpsubusw %xmm1, %xmm0, %xmm0
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rrk_128
|
||||
;CHECK: vpsubusw %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rrkz_128
|
||||
;CHECK: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rm_128
|
||||
;CHECK: vpsubusw (%rdi), %xmm0, %xmm0
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rmk_128
|
||||
;CHECK: vpsubusw (%rdi), %xmm0, %xmm1 {%k1}
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rmkz_128
|
||||
;CHECK: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rr_256
|
||||
;CHECK: vpsubusw %ymm1, %ymm0, %ymm0
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rrk_256
|
||||
;CHECK: vpsubusw %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rrkz_256
|
||||
;CHECK: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rm_256
|
||||
;CHECK: vpsubusw (%rdi), %ymm0, %ymm0
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rmk_256
|
||||
;CHECK: vpsubusw (%rdi), %ymm0, %ymm1 {%k1}
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu16_rmkz_256
|
||||
;CHECK: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z}
|
||||
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rr_128
|
||||
;CHECK: vpaddsb %xmm1, %xmm0, %xmm0
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rrk_128
|
||||
;CHECK: vpaddsb %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rrkz_128
|
||||
;CHECK: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rm_128
|
||||
;CHECK: vpaddsb (%rdi), %xmm0, %xmm0
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rmk_128
|
||||
;CHECK: vpaddsb (%rdi), %xmm0, %xmm1 {%k1}
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rmkz_128
|
||||
;CHECK: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rr_256
|
||||
;CHECK: vpaddsb %ymm1, %ymm0, %ymm0
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rrk_256
|
||||
;CHECK: vpaddsb %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rrkz_256
|
||||
;CHECK: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rm_256
|
||||
;CHECK: vpaddsb (%rdi), %ymm0, %ymm0
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rmk_256
|
||||
;CHECK: vpaddsb (%rdi), %ymm0, %ymm1 {%k1}
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epi8_rmkz_256
|
||||
;CHECK: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z}
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rr_128
|
||||
;CHECK: vpsubsb %xmm1, %xmm0, %xmm0
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rrk_128
|
||||
;CHECK: vpsubsb %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rrkz_128
|
||||
;CHECK: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rm_128
|
||||
;CHECK: vpsubsb (%rdi), %xmm0, %xmm0
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rmk_128
|
||||
;CHECK: vpsubsb (%rdi), %xmm0, %xmm1 {%k1}
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rmkz_128
|
||||
;CHECK: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rr_256
|
||||
;CHECK: vpsubsb %ymm1, %ymm0, %ymm0
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rrk_256
|
||||
;CHECK: vpsubsb %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rrkz_256
|
||||
;CHECK: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rm_256
|
||||
;CHECK: vpsubsb (%rdi), %ymm0, %ymm0
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rmk_256
|
||||
;CHECK: vpsubsb (%rdi), %ymm0, %ymm1 {%k1}
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epi8_rmkz_256
|
||||
;CHECK: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z}
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rr_128
|
||||
;CHECK: vpaddusb %xmm1, %xmm0, %xmm0
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rrk_128
|
||||
;CHECK: vpaddusb %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rrkz_128
|
||||
;CHECK: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rm_128
|
||||
;CHECK: vpaddusb (%rdi), %xmm0, %xmm0
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rmk_128
|
||||
;CHECK: vpaddusb (%rdi), %xmm0, %xmm1 {%k1}
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rmkz_128
|
||||
;CHECK: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rr_256
|
||||
;CHECK: vpaddusb %ymm1, %ymm0, %ymm0
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rrk_256
|
||||
;CHECK: vpaddusb %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rrkz_256
|
||||
;CHECK: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rm_256
|
||||
;CHECK: vpaddusb (%rdi), %ymm0, %ymm0
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rmk_256
|
||||
;CHECK: vpaddusb (%rdi), %ymm0, %ymm1 {%k1}
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_adds_epu8_rmkz_256
|
||||
;CHECK: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z}
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rr_128
|
||||
;CHECK: vpsubusb %xmm1, %xmm0, %xmm0
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rrk_128
|
||||
;CHECK: vpsubusb %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rrkz_128
|
||||
;CHECK: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rm_128
|
||||
;CHECK: vpsubusb (%rdi), %xmm0, %xmm0
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rmk_128
|
||||
;CHECK: vpsubusb (%rdi), %xmm0, %xmm1 {%k1}
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rmkz_128
|
||||
;CHECK: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z}
|
||||
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rr_256
|
||||
;CHECK: vpsubusb %ymm1, %ymm0, %ymm0
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rrk_256
|
||||
;CHECK: vpsubusb %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rrkz_256
|
||||
;CHECK: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rm_256
|
||||
;CHECK: vpsubusb (%rdi), %ymm0, %ymm0
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rmk_256
|
||||
;CHECK: vpsubusb (%rdi), %ymm0, %ymm1 {%k1}
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
|
||||
;CHECK-LABEL: test_mask_subs_epu8_rmkz_256
|
||||
;CHECK: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z}
|
||||
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
@ -2335,3 +2335,904 @@
|
||||
// CHECK: vpackuswb -8256(%rdx), %zmm18, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0xed,0x40,0x67,0xba,0xc0,0xdf,0xff,0xff]
|
||||
vpackuswb -8256(%rdx), %zmm18, %zmm23
|
||||
|
||||
// CHECK: vpaddsb %zmm20, %zmm19, %zmm19
|
||||
// CHECK: encoding: [0x62,0xa1,0x65,0x40,0xec,0xdc]
|
||||
vpaddsb %zmm20, %zmm19, %zmm19
|
||||
|
||||
// CHECK: vpaddsb %zmm20, %zmm19, %zmm19 {%k6}
|
||||
// CHECK: encoding: [0x62,0xa1,0x65,0x46,0xec,0xdc]
|
||||
vpaddsb %zmm20, %zmm19, %zmm19 {%k6}
|
||||
|
||||
// CHECK: vpaddsb %zmm20, %zmm19, %zmm19 {%k6} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x65,0xc6,0xec,0xdc]
|
||||
vpaddsb %zmm20, %zmm19, %zmm19 {%k6} {z}
|
||||
|
||||
// CHECK: vpaddsb (%rcx), %zmm19, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x19]
|
||||
vpaddsb (%rcx), %zmm19, %zmm19
|
||||
|
||||
// CHECK: vpaddsb 291(%rax,%r14,8), %zmm19, %zmm19
|
||||
// CHECK: encoding: [0x62,0xa1,0x65,0x40,0xec,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpaddsb 291(%rax,%r14,8), %zmm19, %zmm19
|
||||
|
||||
// CHECK: vpaddsb 8128(%rdx), %zmm19, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x5a,0x7f]
|
||||
vpaddsb 8128(%rdx), %zmm19, %zmm19
|
||||
|
||||
// CHECK: vpaddsb 8192(%rdx), %zmm19, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x9a,0x00,0x20,0x00,0x00]
|
||||
vpaddsb 8192(%rdx), %zmm19, %zmm19
|
||||
|
||||
// CHECK: vpaddsb -8192(%rdx), %zmm19, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x5a,0x80]
|
||||
vpaddsb -8192(%rdx), %zmm19, %zmm19
|
||||
|
||||
// CHECK: vpaddsb -8256(%rdx), %zmm19, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xec,0x9a,0xc0,0xdf,0xff,0xff]
|
||||
vpaddsb -8256(%rdx), %zmm19, %zmm19
|
||||
|
||||
// CHECK: vpaddsw %zmm22, %zmm19, %zmm20
|
||||
// CHECK: encoding: [0x62,0xa1,0x65,0x40,0xed,0xe6]
|
||||
vpaddsw %zmm22, %zmm19, %zmm20
|
||||
|
||||
// CHECK: vpaddsw %zmm22, %zmm19, %zmm20 {%k2}
|
||||
// CHECK: encoding: [0x62,0xa1,0x65,0x42,0xed,0xe6]
|
||||
vpaddsw %zmm22, %zmm19, %zmm20 {%k2}
|
||||
|
||||
// CHECK: vpaddsw %zmm22, %zmm19, %zmm20 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x65,0xc2,0xed,0xe6]
|
||||
vpaddsw %zmm22, %zmm19, %zmm20 {%k2} {z}
|
||||
|
||||
// CHECK: vpaddsw (%rcx), %zmm19, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0x21]
|
||||
vpaddsw (%rcx), %zmm19, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 291(%rax,%r14,8), %zmm19, %zmm20
|
||||
// CHECK: encoding: [0x62,0xa1,0x65,0x40,0xed,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpaddsw 291(%rax,%r14,8), %zmm19, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 8128(%rdx), %zmm19, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0x62,0x7f]
|
||||
vpaddsw 8128(%rdx), %zmm19, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 8192(%rdx), %zmm19, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0xa2,0x00,0x20,0x00,0x00]
|
||||
vpaddsw 8192(%rdx), %zmm19, %zmm20
|
||||
|
||||
// CHECK: vpaddsw -8192(%rdx), %zmm19, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0x62,0x80]
|
||||
vpaddsw -8192(%rdx), %zmm19, %zmm20
|
||||
|
||||
// CHECK: vpaddsw -8256(%rdx), %zmm19, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x65,0x40,0xed,0xa2,0xc0,0xdf,0xff,0xff]
|
||||
vpaddsw -8256(%rdx), %zmm19, %zmm20
|
||||
|
||||
// CHECK: vpaddusb %zmm25, %zmm29, %zmm29
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0x40,0xdc,0xe9]
|
||||
vpaddusb %zmm25, %zmm29, %zmm29
|
||||
|
||||
// CHECK: vpaddusb %zmm25, %zmm29, %zmm29 {%k1}
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0x41,0xdc,0xe9]
|
||||
vpaddusb %zmm25, %zmm29, %zmm29 {%k1}
|
||||
|
||||
// CHECK: vpaddusb %zmm25, %zmm29, %zmm29 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0xc1,0xdc,0xe9]
|
||||
vpaddusb %zmm25, %zmm29, %zmm29 {%k1} {z}
|
||||
|
||||
// CHECK: vpaddusb (%rcx), %zmm29, %zmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0x29]
|
||||
vpaddusb (%rcx), %zmm29, %zmm29
|
||||
|
||||
// CHECK: vpaddusb 291(%rax,%r14,8), %zmm29, %zmm29
|
||||
// CHECK: encoding: [0x62,0x21,0x15,0x40,0xdc,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpaddusb 291(%rax,%r14,8), %zmm29, %zmm29
|
||||
|
||||
// CHECK: vpaddusb 8128(%rdx), %zmm29, %zmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0x6a,0x7f]
|
||||
vpaddusb 8128(%rdx), %zmm29, %zmm29
|
||||
|
||||
// CHECK: vpaddusb 8192(%rdx), %zmm29, %zmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0xaa,0x00,0x20,0x00,0x00]
|
||||
vpaddusb 8192(%rdx), %zmm29, %zmm29
|
||||
|
||||
// CHECK: vpaddusb -8192(%rdx), %zmm29, %zmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0x6a,0x80]
|
||||
vpaddusb -8192(%rdx), %zmm29, %zmm29
|
||||
|
||||
// CHECK: vpaddusb -8256(%rdx), %zmm29, %zmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xdc,0xaa,0xc0,0xdf,0xff,0xff]
|
||||
vpaddusb -8256(%rdx), %zmm29, %zmm29
|
||||
|
||||
// CHECK: vpaddusw %zmm17, %zmm25, %zmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x35,0x40,0xdd,0xe9]
|
||||
vpaddusw %zmm17, %zmm25, %zmm21
|
||||
|
||||
// CHECK: vpaddusw %zmm17, %zmm25, %zmm21 {%k4}
|
||||
// CHECK: encoding: [0x62,0xa1,0x35,0x44,0xdd,0xe9]
|
||||
vpaddusw %zmm17, %zmm25, %zmm21 {%k4}
|
||||
|
||||
// CHECK: vpaddusw %zmm17, %zmm25, %zmm21 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x35,0xc4,0xdd,0xe9]
|
||||
vpaddusw %zmm17, %zmm25, %zmm21 {%k4} {z}
|
||||
|
||||
// CHECK: vpaddusw (%rcx), %zmm25, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0x29]
|
||||
vpaddusw (%rcx), %zmm25, %zmm21
|
||||
|
||||
// CHECK: vpaddusw 291(%rax,%r14,8), %zmm25, %zmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x35,0x40,0xdd,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpaddusw 291(%rax,%r14,8), %zmm25, %zmm21
|
||||
|
||||
// CHECK: vpaddusw 8128(%rdx), %zmm25, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0x6a,0x7f]
|
||||
vpaddusw 8128(%rdx), %zmm25, %zmm21
|
||||
|
||||
// CHECK: vpaddusw 8192(%rdx), %zmm25, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0xaa,0x00,0x20,0x00,0x00]
|
||||
vpaddusw 8192(%rdx), %zmm25, %zmm21
|
||||
|
||||
// CHECK: vpaddusw -8192(%rdx), %zmm25, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0x6a,0x80]
|
||||
vpaddusw -8192(%rdx), %zmm25, %zmm21
|
||||
|
||||
// CHECK: vpaddusw -8256(%rdx), %zmm25, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x35,0x40,0xdd,0xaa,0xc0,0xdf,0xff,0xff]
|
||||
vpaddusw -8256(%rdx), %zmm25, %zmm21
|
||||
|
||||
// CHECK: vpsubsb %zmm20, %zmm24, %zmm25
|
||||
// CHECK: encoding: [0x62,0x21,0x3d,0x40,0xe8,0xcc]
|
||||
vpsubsb %zmm20, %zmm24, %zmm25
|
||||
|
||||
// CHECK: vpsubsb %zmm20, %zmm24, %zmm25 {%k2}
|
||||
// CHECK: encoding: [0x62,0x21,0x3d,0x42,0xe8,0xcc]
|
||||
vpsubsb %zmm20, %zmm24, %zmm25 {%k2}
|
||||
|
||||
// CHECK: vpsubsb %zmm20, %zmm24, %zmm25 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x21,0x3d,0xc2,0xe8,0xcc]
|
||||
vpsubsb %zmm20, %zmm24, %zmm25 {%k2} {z}
|
||||
|
||||
// CHECK: vpsubsb (%rcx), %zmm24, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x09]
|
||||
vpsubsb (%rcx), %zmm24, %zmm25
|
||||
|
||||
// CHECK: vpsubsb 291(%rax,%r14,8), %zmm24, %zmm25
|
||||
// CHECK: encoding: [0x62,0x21,0x3d,0x40,0xe8,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpsubsb 291(%rax,%r14,8), %zmm24, %zmm25
|
||||
|
||||
// CHECK: vpsubsb 8128(%rdx), %zmm24, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x4a,0x7f]
|
||||
vpsubsb 8128(%rdx), %zmm24, %zmm25
|
||||
|
||||
// CHECK: vpsubsb 8192(%rdx), %zmm24, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x8a,0x00,0x20,0x00,0x00]
|
||||
vpsubsb 8192(%rdx), %zmm24, %zmm25
|
||||
|
||||
// CHECK: vpsubsb -8192(%rdx), %zmm24, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x4a,0x80]
|
||||
vpsubsb -8192(%rdx), %zmm24, %zmm25
|
||||
|
||||
// CHECK: vpsubsb -8256(%rdx), %zmm24, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe8,0x8a,0xc0,0xdf,0xff,0xff]
|
||||
vpsubsb -8256(%rdx), %zmm24, %zmm25
|
||||
|
||||
// CHECK: vpsubsw %zmm23, %zmm22, %zmm23
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xe9,0xff]
|
||||
vpsubsw %zmm23, %zmm22, %zmm23
|
||||
|
||||
// CHECK: vpsubsw %zmm23, %zmm22, %zmm23 {%k3}
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x43,0xe9,0xff]
|
||||
vpsubsw %zmm23, %zmm22, %zmm23 {%k3}
|
||||
|
||||
// CHECK: vpsubsw %zmm23, %zmm22, %zmm23 {%k3} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0xc3,0xe9,0xff]
|
||||
vpsubsw %zmm23, %zmm22, %zmm23 {%k3} {z}
|
||||
|
||||
// CHECK: vpsubsw (%rcx), %zmm22, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0x39]
|
||||
vpsubsw (%rcx), %zmm22, %zmm23
|
||||
|
||||
// CHECK: vpsubsw 291(%rax,%r14,8), %zmm22, %zmm23
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xe9,0xbc,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpsubsw 291(%rax,%r14,8), %zmm22, %zmm23
|
||||
|
||||
// CHECK: vpsubsw 8128(%rdx), %zmm22, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0x7a,0x7f]
|
||||
vpsubsw 8128(%rdx), %zmm22, %zmm23
|
||||
|
||||
// CHECK: vpsubsw 8192(%rdx), %zmm22, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0xba,0x00,0x20,0x00,0x00]
|
||||
vpsubsw 8192(%rdx), %zmm22, %zmm23
|
||||
|
||||
// CHECK: vpsubsw -8192(%rdx), %zmm22, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0x7a,0x80]
|
||||
vpsubsw -8192(%rdx), %zmm22, %zmm23
|
||||
|
||||
// CHECK: vpsubsw -8256(%rdx), %zmm22, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xe9,0xba,0xc0,0xdf,0xff,0xff]
|
||||
vpsubsw -8256(%rdx), %zmm22, %zmm23
|
||||
|
||||
// CHECK: vpsubusb %zmm22, %zmm21, %zmm24
|
||||
// CHECK: encoding: [0x62,0x21,0x55,0x40,0xd8,0xc6]
|
||||
vpsubusb %zmm22, %zmm21, %zmm24
|
||||
|
||||
// CHECK: vpsubusb %zmm22, %zmm21, %zmm24 {%k4}
|
||||
// CHECK: encoding: [0x62,0x21,0x55,0x44,0xd8,0xc6]
|
||||
vpsubusb %zmm22, %zmm21, %zmm24 {%k4}
|
||||
|
||||
// CHECK: vpsubusb %zmm22, %zmm21, %zmm24 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x21,0x55,0xc4,0xd8,0xc6]
|
||||
vpsubusb %zmm22, %zmm21, %zmm24 {%k4} {z}
|
||||
|
||||
// CHECK: vpsubusb (%rcx), %zmm21, %zmm24
|
||||
// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x01]
|
||||
vpsubusb (%rcx), %zmm21, %zmm24
|
||||
|
||||
// CHECK: vpsubusb 291(%rax,%r14,8), %zmm21, %zmm24
|
||||
// CHECK: encoding: [0x62,0x21,0x55,0x40,0xd8,0x84,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpsubusb 291(%rax,%r14,8), %zmm21, %zmm24
|
||||
|
||||
// CHECK: vpsubusb 8128(%rdx), %zmm21, %zmm24
|
||||
// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x42,0x7f]
|
||||
vpsubusb 8128(%rdx), %zmm21, %zmm24
|
||||
|
||||
// CHECK: vpsubusb 8192(%rdx), %zmm21, %zmm24
|
||||
// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x82,0x00,0x20,0x00,0x00]
|
||||
vpsubusb 8192(%rdx), %zmm21, %zmm24
|
||||
|
||||
// CHECK: vpsubusb -8192(%rdx), %zmm21, %zmm24
|
||||
// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x42,0x80]
|
||||
vpsubusb -8192(%rdx), %zmm21, %zmm24
|
||||
|
||||
// CHECK: vpsubusb -8256(%rdx), %zmm21, %zmm24
|
||||
// CHECK: encoding: [0x62,0x61,0x55,0x40,0xd8,0x82,0xc0,0xdf,0xff,0xff]
|
||||
vpsubusb -8256(%rdx), %zmm21, %zmm24
|
||||
|
||||
// CHECK: vpsubusw %zmm23, %zmm17, %zmm25
|
||||
// CHECK: encoding: [0x62,0x21,0x75,0x40,0xd9,0xcf]
|
||||
vpsubusw %zmm23, %zmm17, %zmm25
|
||||
|
||||
// CHECK: vpsubusw %zmm23, %zmm17, %zmm25 {%k1}
|
||||
// CHECK: encoding: [0x62,0x21,0x75,0x41,0xd9,0xcf]
|
||||
vpsubusw %zmm23, %zmm17, %zmm25 {%k1}
|
||||
|
||||
// CHECK: vpsubusw %zmm23, %zmm17, %zmm25 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0x21,0x75,0xc1,0xd9,0xcf]
|
||||
vpsubusw %zmm23, %zmm17, %zmm25 {%k1} {z}
|
||||
|
||||
// CHECK: vpsubusw (%rcx), %zmm17, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x09]
|
||||
vpsubusw (%rcx), %zmm17, %zmm25
|
||||
|
||||
// CHECK: vpsubusw 291(%rax,%r14,8), %zmm17, %zmm25
|
||||
// CHECK: encoding: [0x62,0x21,0x75,0x40,0xd9,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpsubusw 291(%rax,%r14,8), %zmm17, %zmm25
|
||||
|
||||
// CHECK: vpsubusw 8128(%rdx), %zmm17, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x4a,0x7f]
|
||||
vpsubusw 8128(%rdx), %zmm17, %zmm25
|
||||
|
||||
// CHECK: vpsubusw 8192(%rdx), %zmm17, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x8a,0x00,0x20,0x00,0x00]
|
||||
vpsubusw 8192(%rdx), %zmm17, %zmm25
|
||||
|
||||
// CHECK: vpsubusw -8192(%rdx), %zmm17, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x4a,0x80]
|
||||
vpsubusw -8192(%rdx), %zmm17, %zmm25
|
||||
|
||||
// CHECK: vpsubusw -8256(%rdx), %zmm17, %zmm25
|
||||
// CHECK: encoding: [0x62,0x61,0x75,0x40,0xd9,0x8a,0xc0,0xdf,0xff,0xff]
|
||||
vpsubusw -8256(%rdx), %zmm17, %zmm25
|
||||
|
||||
// CHECK: vpaddb %zmm26, %zmm29, %zmm21
|
||||
// CHECK: encoding: [0x62,0x81,0x15,0x40,0xfc,0xea]
|
||||
vpaddb %zmm26, %zmm29, %zmm21
|
||||
|
||||
// CHECK: vpaddb %zmm26, %zmm29, %zmm21 {%k5}
|
||||
// CHECK: encoding: [0x62,0x81,0x15,0x45,0xfc,0xea]
|
||||
vpaddb %zmm26, %zmm29, %zmm21 {%k5}
|
||||
|
||||
// CHECK: vpaddb %zmm26, %zmm29, %zmm21 {%k5} {z}
|
||||
// CHECK: encoding: [0x62,0x81,0x15,0xc5,0xfc,0xea]
|
||||
vpaddb %zmm26, %zmm29, %zmm21 {%k5} {z}
|
||||
|
||||
// CHECK: vpaddb (%rcx), %zmm29, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0x29]
|
||||
vpaddb (%rcx), %zmm29, %zmm21
|
||||
|
||||
// CHECK: vpaddb 4660(%rax,%r14,8), %zmm29, %zmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x15,0x40,0xfc,0xac,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpaddb 4660(%rax,%r14,8), %zmm29, %zmm21
|
||||
|
||||
// CHECK: vpaddb 8128(%rdx), %zmm29, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0x6a,0x7f]
|
||||
vpaddb 8128(%rdx), %zmm29, %zmm21
|
||||
|
||||
// CHECK: vpaddb 8192(%rdx), %zmm29, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0xaa,0x00,0x20,0x00,0x00]
|
||||
vpaddb 8192(%rdx), %zmm29, %zmm21
|
||||
|
||||
// CHECK: vpaddb -8192(%rdx), %zmm29, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0x6a,0x80]
|
||||
vpaddb -8192(%rdx), %zmm29, %zmm21
|
||||
|
||||
// CHECK: vpaddb -8256(%rdx), %zmm29, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x15,0x40,0xfc,0xaa,0xc0,0xdf,0xff,0xff]
|
||||
vpaddb -8256(%rdx), %zmm29, %zmm21
|
||||
|
||||
// CHECK: vpaddsb %zmm19, %zmm24, %zmm18
|
||||
// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xec,0xd3]
|
||||
vpaddsb %zmm19, %zmm24, %zmm18
|
||||
|
||||
// CHECK: vpaddsb %zmm19, %zmm24, %zmm18 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa1,0x3d,0x41,0xec,0xd3]
|
||||
vpaddsb %zmm19, %zmm24, %zmm18 {%k1}
|
||||
|
||||
// CHECK: vpaddsb %zmm19, %zmm24, %zmm18 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x3d,0xc1,0xec,0xd3]
|
||||
vpaddsb %zmm19, %zmm24, %zmm18 {%k1} {z}
|
||||
|
||||
// CHECK: vpaddsb (%rcx), %zmm24, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x11]
|
||||
vpaddsb (%rcx), %zmm24, %zmm18
|
||||
|
||||
// CHECK: vpaddsb 4660(%rax,%r14,8), %zmm24, %zmm18
|
||||
// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xec,0x94,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpaddsb 4660(%rax,%r14,8), %zmm24, %zmm18
|
||||
|
||||
// CHECK: vpaddsb 8128(%rdx), %zmm24, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x52,0x7f]
|
||||
vpaddsb 8128(%rdx), %zmm24, %zmm18
|
||||
|
||||
// CHECK: vpaddsb 8192(%rdx), %zmm24, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x92,0x00,0x20,0x00,0x00]
|
||||
vpaddsb 8192(%rdx), %zmm24, %zmm18
|
||||
|
||||
// CHECK: vpaddsb -8192(%rdx), %zmm24, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x52,0x80]
|
||||
vpaddsb -8192(%rdx), %zmm24, %zmm18
|
||||
|
||||
// CHECK: vpaddsb -8256(%rdx), %zmm24, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0xec,0x92,0xc0,0xdf,0xff,0xff]
|
||||
vpaddsb -8256(%rdx), %zmm24, %zmm18
|
||||
|
||||
// CHECK: vpaddsw %zmm28, %zmm17, %zmm20
|
||||
// CHECK: encoding: [0x62,0x81,0x75,0x40,0xed,0xe4]
|
||||
vpaddsw %zmm28, %zmm17, %zmm20
|
||||
|
||||
// CHECK: vpaddsw %zmm28, %zmm17, %zmm20 {%k2}
|
||||
// CHECK: encoding: [0x62,0x81,0x75,0x42,0xed,0xe4]
|
||||
vpaddsw %zmm28, %zmm17, %zmm20 {%k2}
|
||||
|
||||
// CHECK: vpaddsw %zmm28, %zmm17, %zmm20 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x81,0x75,0xc2,0xed,0xe4]
|
||||
vpaddsw %zmm28, %zmm17, %zmm20 {%k2} {z}
|
||||
|
||||
// CHECK: vpaddsw (%rcx), %zmm17, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0x21]
|
||||
vpaddsw (%rcx), %zmm17, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 4660(%rax,%r14,8), %zmm17, %zmm20
|
||||
// CHECK: encoding: [0x62,0xa1,0x75,0x40,0xed,0xa4,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpaddsw 4660(%rax,%r14,8), %zmm17, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 8128(%rdx), %zmm17, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0x62,0x7f]
|
||||
vpaddsw 8128(%rdx), %zmm17, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 8192(%rdx), %zmm17, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0xa2,0x00,0x20,0x00,0x00]
|
||||
vpaddsw 8192(%rdx), %zmm17, %zmm20
|
||||
|
||||
// CHECK: vpaddsw -8192(%rdx), %zmm17, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0x62,0x80]
|
||||
vpaddsw -8192(%rdx), %zmm17, %zmm20
|
||||
|
||||
// CHECK: vpaddsw -8256(%rdx), %zmm17, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x75,0x40,0xed,0xa2,0xc0,0xdf,0xff,0xff]
|
||||
vpaddsw -8256(%rdx), %zmm17, %zmm20
|
||||
|
||||
// CHECK: vpaddusb %zmm22, %zmm27, %zmm19
|
||||
// CHECK: encoding: [0x62,0xa1,0x25,0x40,0xdc,0xde]
|
||||
vpaddusb %zmm22, %zmm27, %zmm19
|
||||
|
||||
// CHECK: vpaddusb %zmm22, %zmm27, %zmm19 {%k7}
|
||||
// CHECK: encoding: [0x62,0xa1,0x25,0x47,0xdc,0xde]
|
||||
vpaddusb %zmm22, %zmm27, %zmm19 {%k7}
|
||||
|
||||
// CHECK: vpaddusb %zmm22, %zmm27, %zmm19 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x25,0xc7,0xdc,0xde]
|
||||
vpaddusb %zmm22, %zmm27, %zmm19 {%k7} {z}
|
||||
|
||||
// CHECK: vpaddusb (%rcx), %zmm27, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x19]
|
||||
vpaddusb (%rcx), %zmm27, %zmm19
|
||||
|
||||
// CHECK: vpaddusb 4660(%rax,%r14,8), %zmm27, %zmm19
|
||||
// CHECK: encoding: [0x62,0xa1,0x25,0x40,0xdc,0x9c,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpaddusb 4660(%rax,%r14,8), %zmm27, %zmm19
|
||||
|
||||
// CHECK: vpaddusb 8128(%rdx), %zmm27, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x5a,0x7f]
|
||||
vpaddusb 8128(%rdx), %zmm27, %zmm19
|
||||
|
||||
// CHECK: vpaddusb 8192(%rdx), %zmm27, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x9a,0x00,0x20,0x00,0x00]
|
||||
vpaddusb 8192(%rdx), %zmm27, %zmm19
|
||||
|
||||
// CHECK: vpaddusb -8192(%rdx), %zmm27, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x5a,0x80]
|
||||
vpaddusb -8192(%rdx), %zmm27, %zmm19
|
||||
|
||||
// CHECK: vpaddusb -8256(%rdx), %zmm27, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x25,0x40,0xdc,0x9a,0xc0,0xdf,0xff,0xff]
|
||||
vpaddusb -8256(%rdx), %zmm27, %zmm19
|
||||
|
||||
// CHECK: vpaddusw %zmm23, %zmm23, %zmm27
|
||||
// CHECK: encoding: [0x62,0x21,0x45,0x40,0xdd,0xdf]
|
||||
vpaddusw %zmm23, %zmm23, %zmm27
|
||||
|
||||
// CHECK: vpaddusw %zmm23, %zmm23, %zmm27 {%k7}
|
||||
// CHECK: encoding: [0x62,0x21,0x45,0x47,0xdd,0xdf]
|
||||
vpaddusw %zmm23, %zmm23, %zmm27 {%k7}
|
||||
|
||||
// CHECK: vpaddusw %zmm23, %zmm23, %zmm27 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x21,0x45,0xc7,0xdd,0xdf]
|
||||
vpaddusw %zmm23, %zmm23, %zmm27 {%k7} {z}
|
||||
|
||||
// CHECK: vpaddusw (%rcx), %zmm23, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x19]
|
||||
vpaddusw (%rcx), %zmm23, %zmm27
|
||||
|
||||
// CHECK: vpaddusw 4660(%rax,%r14,8), %zmm23, %zmm27
|
||||
// CHECK: encoding: [0x62,0x21,0x45,0x40,0xdd,0x9c,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpaddusw 4660(%rax,%r14,8), %zmm23, %zmm27
|
||||
|
||||
// CHECK: vpaddusw 8128(%rdx), %zmm23, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x5a,0x7f]
|
||||
vpaddusw 8128(%rdx), %zmm23, %zmm27
|
||||
|
||||
// CHECK: vpaddusw 8192(%rdx), %zmm23, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x9a,0x00,0x20,0x00,0x00]
|
||||
vpaddusw 8192(%rdx), %zmm23, %zmm27
|
||||
|
||||
// CHECK: vpaddusw -8192(%rdx), %zmm23, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x5a,0x80]
|
||||
vpaddusw -8192(%rdx), %zmm23, %zmm27
|
||||
|
||||
// CHECK: vpaddusw -8256(%rdx), %zmm23, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x45,0x40,0xdd,0x9a,0xc0,0xdf,0xff,0xff]
|
||||
vpaddusw -8256(%rdx), %zmm23, %zmm27
|
||||
|
||||
// CHECK: vpsubsb %zmm18, %zmm28, %zmm17
|
||||
// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xe8,0xca]
|
||||
vpsubsb %zmm18, %zmm28, %zmm17
|
||||
|
||||
// CHECK: vpsubsb %zmm18, %zmm28, %zmm17 {%k5}
|
||||
// CHECK: encoding: [0x62,0xa1,0x1d,0x45,0xe8,0xca]
|
||||
vpsubsb %zmm18, %zmm28, %zmm17 {%k5}
|
||||
|
||||
// CHECK: vpsubsb %zmm18, %zmm28, %zmm17 {%k5} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x1d,0xc5,0xe8,0xca]
|
||||
vpsubsb %zmm18, %zmm28, %zmm17 {%k5} {z}
|
||||
|
||||
// CHECK: vpsubsb (%rcx), %zmm28, %zmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x09]
|
||||
vpsubsb (%rcx), %zmm28, %zmm17
|
||||
|
||||
// CHECK: vpsubsb 4660(%rax,%r14,8), %zmm28, %zmm17
|
||||
// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xe8,0x8c,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpsubsb 4660(%rax,%r14,8), %zmm28, %zmm17
|
||||
|
||||
// CHECK: vpsubsb 8128(%rdx), %zmm28, %zmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x4a,0x7f]
|
||||
vpsubsb 8128(%rdx), %zmm28, %zmm17
|
||||
|
||||
// CHECK: vpsubsb 8192(%rdx), %zmm28, %zmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x8a,0x00,0x20,0x00,0x00]
|
||||
vpsubsb 8192(%rdx), %zmm28, %zmm17
|
||||
|
||||
// CHECK: vpsubsb -8192(%rdx), %zmm28, %zmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x4a,0x80]
|
||||
vpsubsb -8192(%rdx), %zmm28, %zmm17
|
||||
|
||||
// CHECK: vpsubsb -8256(%rdx), %zmm28, %zmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xe8,0x8a,0xc0,0xdf,0xff,0xff]
|
||||
vpsubsb -8256(%rdx), %zmm28, %zmm17
|
||||
|
||||
// CHECK: vpsubsw %zmm26, %zmm24, %zmm30
|
||||
// CHECK: encoding: [0x62,0x01,0x3d,0x40,0xe9,0xf2]
|
||||
vpsubsw %zmm26, %zmm24, %zmm30
|
||||
|
||||
// CHECK: vpsubsw %zmm26, %zmm24, %zmm30 {%k3}
|
||||
// CHECK: encoding: [0x62,0x01,0x3d,0x43,0xe9,0xf2]
|
||||
vpsubsw %zmm26, %zmm24, %zmm30 {%k3}
|
||||
|
||||
// CHECK: vpsubsw %zmm26, %zmm24, %zmm30 {%k3} {z}
|
||||
// CHECK: encoding: [0x62,0x01,0x3d,0xc3,0xe9,0xf2]
|
||||
vpsubsw %zmm26, %zmm24, %zmm30 {%k3} {z}
|
||||
|
||||
// CHECK: vpsubsw (%rcx), %zmm24, %zmm30
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0x31]
|
||||
vpsubsw (%rcx), %zmm24, %zmm30
|
||||
|
||||
// CHECK: vpsubsw 4660(%rax,%r14,8), %zmm24, %zmm30
|
||||
// CHECK: encoding: [0x62,0x21,0x3d,0x40,0xe9,0xb4,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpsubsw 4660(%rax,%r14,8), %zmm24, %zmm30
|
||||
|
||||
// CHECK: vpsubsw 8128(%rdx), %zmm24, %zmm30
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0x72,0x7f]
|
||||
vpsubsw 8128(%rdx), %zmm24, %zmm30
|
||||
|
||||
// CHECK: vpsubsw 8192(%rdx), %zmm24, %zmm30
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0xb2,0x00,0x20,0x00,0x00]
|
||||
vpsubsw 8192(%rdx), %zmm24, %zmm30
|
||||
|
||||
// CHECK: vpsubsw -8192(%rdx), %zmm24, %zmm30
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0x72,0x80]
|
||||
vpsubsw -8192(%rdx), %zmm24, %zmm30
|
||||
|
||||
// CHECK: vpsubsw -8256(%rdx), %zmm24, %zmm30
|
||||
// CHECK: encoding: [0x62,0x61,0x3d,0x40,0xe9,0xb2,0xc0,0xdf,0xff,0xff]
|
||||
vpsubsw -8256(%rdx), %zmm24, %zmm30
|
||||
|
||||
// CHECK: vpsubusb %zmm28, %zmm29, %zmm28
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0x40,0xd8,0xe4]
|
||||
vpsubusb %zmm28, %zmm29, %zmm28
|
||||
|
||||
// CHECK: vpsubusb %zmm28, %zmm29, %zmm28 {%k2}
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0x42,0xd8,0xe4]
|
||||
vpsubusb %zmm28, %zmm29, %zmm28 {%k2}
|
||||
|
||||
// CHECK: vpsubusb %zmm28, %zmm29, %zmm28 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0xc2,0xd8,0xe4]
|
||||
vpsubusb %zmm28, %zmm29, %zmm28 {%k2} {z}
|
||||
|
||||
// CHECK: vpsubusb (%rcx), %zmm29, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x21]
|
||||
vpsubusb (%rcx), %zmm29, %zmm28
|
||||
|
||||
// CHECK: vpsubusb 4660(%rax,%r14,8), %zmm29, %zmm28
|
||||
// CHECK: encoding: [0x62,0x21,0x15,0x40,0xd8,0xa4,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpsubusb 4660(%rax,%r14,8), %zmm29, %zmm28
|
||||
|
||||
// CHECK: vpsubusb 8128(%rdx), %zmm29, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x62,0x7f]
|
||||
vpsubusb 8128(%rdx), %zmm29, %zmm28
|
||||
|
||||
// CHECK: vpsubusb 8192(%rdx), %zmm29, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0xa2,0x00,0x20,0x00,0x00]
|
||||
vpsubusb 8192(%rdx), %zmm29, %zmm28
|
||||
|
||||
// CHECK: vpsubusb -8192(%rdx), %zmm29, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x62,0x80]
|
||||
vpsubusb -8192(%rdx), %zmm29, %zmm28
|
||||
|
||||
// CHECK: vpsubusb -8256(%rdx), %zmm29, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0xa2,0xc0,0xdf,0xff,0xff]
|
||||
vpsubusb -8256(%rdx), %zmm29, %zmm28
|
||||
|
||||
// CHECK: vpsubusw %zmm19, %zmm28, %zmm22
|
||||
// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xd9,0xf3]
|
||||
vpsubusw %zmm19, %zmm28, %zmm22
|
||||
|
||||
// CHECK: vpsubusw %zmm19, %zmm28, %zmm22 {%k5}
|
||||
// CHECK: encoding: [0x62,0xa1,0x1d,0x45,0xd9,0xf3]
|
||||
vpsubusw %zmm19, %zmm28, %zmm22 {%k5}
|
||||
|
||||
// CHECK: vpsubusw %zmm19, %zmm28, %zmm22 {%k5} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x1d,0xc5,0xd9,0xf3]
|
||||
vpsubusw %zmm19, %zmm28, %zmm22 {%k5} {z}
|
||||
|
||||
// CHECK: vpsubusw (%rcx), %zmm28, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0x31]
|
||||
vpsubusw (%rcx), %zmm28, %zmm22
|
||||
|
||||
// CHECK: vpsubusw 4660(%rax,%r14,8), %zmm28, %zmm22
|
||||
// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xd9,0xb4,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpsubusw 4660(%rax,%r14,8), %zmm28, %zmm22
|
||||
|
||||
// CHECK: vpsubusw 8128(%rdx), %zmm28, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0x72,0x7f]
|
||||
vpsubusw 8128(%rdx), %zmm28, %zmm22
|
||||
|
||||
// CHECK: vpsubusw 8192(%rdx), %zmm28, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0xb2,0x00,0x20,0x00,0x00]
|
||||
vpsubusw 8192(%rdx), %zmm28, %zmm22
|
||||
|
||||
// CHECK: vpsubusw -8192(%rdx), %zmm28, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0x72,0x80]
|
||||
vpsubusw -8192(%rdx), %zmm28, %zmm22
|
||||
|
||||
// CHECK: vpsubusw -8256(%rdx), %zmm28, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x1d,0x40,0xd9,0xb2,0xc0,0xdf,0xff,0xff]
|
||||
vpsubusw -8256(%rdx), %zmm28, %zmm22
|
||||
|
||||
// CHECK: vpaddsb %zmm25, %zmm19, %zmm28
|
||||
// CHECK: encoding: [0x62,0x01,0x65,0x40,0xec,0xe1]
|
||||
vpaddsb %zmm25, %zmm19, %zmm28
|
||||
|
||||
// CHECK: vpaddsb %zmm25, %zmm19, %zmm28 {%k4}
|
||||
// CHECK: encoding: [0x62,0x01,0x65,0x44,0xec,0xe1]
|
||||
vpaddsb %zmm25, %zmm19, %zmm28 {%k4}
|
||||
|
||||
// CHECK: vpaddsb %zmm25, %zmm19, %zmm28 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x01,0x65,0xc4,0xec,0xe1]
|
||||
vpaddsb %zmm25, %zmm19, %zmm28 {%k4} {z}
|
||||
|
||||
// CHECK: vpaddsb (%rcx), %zmm19, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0x21]
|
||||
vpaddsb (%rcx), %zmm19, %zmm28
|
||||
|
||||
// CHECK: vpaddsb 291(%rax,%r14,8), %zmm19, %zmm28
|
||||
// CHECK: encoding: [0x62,0x21,0x65,0x40,0xec,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpaddsb 291(%rax,%r14,8), %zmm19, %zmm28
|
||||
|
||||
// CHECK: vpaddsb 8128(%rdx), %zmm19, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0x62,0x7f]
|
||||
vpaddsb 8128(%rdx), %zmm19, %zmm28
|
||||
|
||||
// CHECK: vpaddsb 8192(%rdx), %zmm19, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0xa2,0x00,0x20,0x00,0x00]
|
||||
vpaddsb 8192(%rdx), %zmm19, %zmm28
|
||||
|
||||
// CHECK: vpaddsb -8192(%rdx), %zmm19, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0x62,0x80]
|
||||
vpaddsb -8192(%rdx), %zmm19, %zmm28
|
||||
|
||||
// CHECK: vpaddsb -8256(%rdx), %zmm19, %zmm28
|
||||
// CHECK: encoding: [0x62,0x61,0x65,0x40,0xec,0xa2,0xc0,0xdf,0xff,0xff]
|
||||
vpaddsb -8256(%rdx), %zmm19, %zmm28
|
||||
|
||||
// CHECK: vpaddsw %zmm20, %zmm22, %zmm20
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xed,0xe4]
|
||||
vpaddsw %zmm20, %zmm22, %zmm20
|
||||
|
||||
// CHECK: vpaddsw %zmm20, %zmm22, %zmm20 {%k7}
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x47,0xed,0xe4]
|
||||
vpaddsw %zmm20, %zmm22, %zmm20 {%k7}
|
||||
|
||||
// CHECK: vpaddsw %zmm20, %zmm22, %zmm20 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0xc7,0xed,0xe4]
|
||||
vpaddsw %zmm20, %zmm22, %zmm20 {%k7} {z}
|
||||
|
||||
// CHECK: vpaddsw (%rcx), %zmm22, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0x21]
|
||||
vpaddsw (%rcx), %zmm22, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 291(%rax,%r14,8), %zmm22, %zmm20
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xed,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpaddsw 291(%rax,%r14,8), %zmm22, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 8128(%rdx), %zmm22, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0x62,0x7f]
|
||||
vpaddsw 8128(%rdx), %zmm22, %zmm20
|
||||
|
||||
// CHECK: vpaddsw 8192(%rdx), %zmm22, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0xa2,0x00,0x20,0x00,0x00]
|
||||
vpaddsw 8192(%rdx), %zmm22, %zmm20
|
||||
|
||||
// CHECK: vpaddsw -8192(%rdx), %zmm22, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0x62,0x80]
|
||||
vpaddsw -8192(%rdx), %zmm22, %zmm20
|
||||
|
||||
// CHECK: vpaddsw -8256(%rdx), %zmm22, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xed,0xa2,0xc0,0xdf,0xff,0xff]
|
||||
vpaddsw -8256(%rdx), %zmm22, %zmm20
|
||||
|
||||
// CHECK: vpaddusb %zmm17, %zmm27, %zmm26
|
||||
// CHECK: encoding: [0x62,0x21,0x25,0x40,0xdc,0xd1]
|
||||
vpaddusb %zmm17, %zmm27, %zmm26
|
||||
|
||||
// CHECK: vpaddusb %zmm17, %zmm27, %zmm26 {%k3}
|
||||
// CHECK: encoding: [0x62,0x21,0x25,0x43,0xdc,0xd1]
|
||||
vpaddusb %zmm17, %zmm27, %zmm26 {%k3}
|
||||
|
||||
// CHECK: vpaddusb %zmm17, %zmm27, %zmm26 {%k3} {z}
|
||||
// CHECK: encoding: [0x62,0x21,0x25,0xc3,0xdc,0xd1]
|
||||
vpaddusb %zmm17, %zmm27, %zmm26 {%k3} {z}
|
||||
|
||||
// CHECK: vpaddusb (%rcx), %zmm27, %zmm26
|
||||
// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x11]
|
||||
vpaddusb (%rcx), %zmm27, %zmm26
|
||||
|
||||
// CHECK: vpaddusb 291(%rax,%r14,8), %zmm27, %zmm26
|
||||
// CHECK: encoding: [0x62,0x21,0x25,0x40,0xdc,0x94,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpaddusb 291(%rax,%r14,8), %zmm27, %zmm26
|
||||
|
||||
// CHECK: vpaddusb 8128(%rdx), %zmm27, %zmm26
|
||||
// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x52,0x7f]
|
||||
vpaddusb 8128(%rdx), %zmm27, %zmm26
|
||||
|
||||
// CHECK: vpaddusb 8192(%rdx), %zmm27, %zmm26
|
||||
// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x92,0x00,0x20,0x00,0x00]
|
||||
vpaddusb 8192(%rdx), %zmm27, %zmm26
|
||||
|
||||
// CHECK: vpaddusb -8192(%rdx), %zmm27, %zmm26
|
||||
// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x52,0x80]
|
||||
vpaddusb -8192(%rdx), %zmm27, %zmm26
|
||||
|
||||
// CHECK: vpaddusb -8256(%rdx), %zmm27, %zmm26
|
||||
// CHECK: encoding: [0x62,0x61,0x25,0x40,0xdc,0x92,0xc0,0xdf,0xff,0xff]
|
||||
vpaddusb -8256(%rdx), %zmm27, %zmm26
|
||||
|
||||
// CHECK: vpaddusw %zmm20, %zmm22, %zmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xdd,0xec]
|
||||
vpaddusw %zmm20, %zmm22, %zmm21
|
||||
|
||||
// CHECK: vpaddusw %zmm20, %zmm22, %zmm21 {%k7}
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x47,0xdd,0xec]
|
||||
vpaddusw %zmm20, %zmm22, %zmm21 {%k7}
|
||||
|
||||
// CHECK: vpaddusw %zmm20, %zmm22, %zmm21 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0xc7,0xdd,0xec]
|
||||
vpaddusw %zmm20, %zmm22, %zmm21 {%k7} {z}
|
||||
|
||||
// CHECK: vpaddusw (%rcx), %zmm22, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0x29]
|
||||
vpaddusw (%rcx), %zmm22, %zmm21
|
||||
|
||||
// CHECK: vpaddusw 291(%rax,%r14,8), %zmm22, %zmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x4d,0x40,0xdd,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpaddusw 291(%rax,%r14,8), %zmm22, %zmm21
|
||||
|
||||
// CHECK: vpaddusw 8128(%rdx), %zmm22, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0x6a,0x7f]
|
||||
vpaddusw 8128(%rdx), %zmm22, %zmm21
|
||||
|
||||
// CHECK: vpaddusw 8192(%rdx), %zmm22, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0xaa,0x00,0x20,0x00,0x00]
|
||||
vpaddusw 8192(%rdx), %zmm22, %zmm21
|
||||
|
||||
// CHECK: vpaddusw -8192(%rdx), %zmm22, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0x6a,0x80]
|
||||
vpaddusw -8192(%rdx), %zmm22, %zmm21
|
||||
|
||||
// CHECK: vpaddusw -8256(%rdx), %zmm22, %zmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4d,0x40,0xdd,0xaa,0xc0,0xdf,0xff,0xff]
|
||||
vpaddusw -8256(%rdx), %zmm22, %zmm21
|
||||
|
||||
// CHECK: vpsubsb %zmm28, %zmm21, %zmm19
|
||||
// CHECK: encoding: [0x62,0x81,0x55,0x40,0xe8,0xdc]
|
||||
vpsubsb %zmm28, %zmm21, %zmm19
|
||||
|
||||
// CHECK: vpsubsb %zmm28, %zmm21, %zmm19 {%k2}
|
||||
// CHECK: encoding: [0x62,0x81,0x55,0x42,0xe8,0xdc]
|
||||
vpsubsb %zmm28, %zmm21, %zmm19 {%k2}
|
||||
|
||||
// CHECK: vpsubsb %zmm28, %zmm21, %zmm19 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x81,0x55,0xc2,0xe8,0xdc]
|
||||
vpsubsb %zmm28, %zmm21, %zmm19 {%k2} {z}
|
||||
|
||||
// CHECK: vpsubsb (%rcx), %zmm21, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x19]
|
||||
vpsubsb (%rcx), %zmm21, %zmm19
|
||||
|
||||
// CHECK: vpsubsb 291(%rax,%r14,8), %zmm21, %zmm19
|
||||
// CHECK: encoding: [0x62,0xa1,0x55,0x40,0xe8,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpsubsb 291(%rax,%r14,8), %zmm21, %zmm19
|
||||
|
||||
// CHECK: vpsubsb 8128(%rdx), %zmm21, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x5a,0x7f]
|
||||
vpsubsb 8128(%rdx), %zmm21, %zmm19
|
||||
|
||||
// CHECK: vpsubsb 8192(%rdx), %zmm21, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x9a,0x00,0x20,0x00,0x00]
|
||||
vpsubsb 8192(%rdx), %zmm21, %zmm19
|
||||
|
||||
// CHECK: vpsubsb -8192(%rdx), %zmm21, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x5a,0x80]
|
||||
vpsubsb -8192(%rdx), %zmm21, %zmm19
|
||||
|
||||
// CHECK: vpsubsb -8256(%rdx), %zmm21, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x55,0x40,0xe8,0x9a,0xc0,0xdf,0xff,0xff]
|
||||
vpsubsb -8256(%rdx), %zmm21, %zmm19
|
||||
|
||||
// CHECK: vpsubsw %zmm23, %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0xa1,0x45,0x40,0xe9,0xff]
|
||||
vpsubsw %zmm23, %zmm23, %zmm23
|
||||
|
||||
// CHECK: vpsubsw %zmm23, %zmm23, %zmm23 {%k6}
|
||||
// CHECK: encoding: [0x62,0xa1,0x45,0x46,0xe9,0xff]
|
||||
vpsubsw %zmm23, %zmm23, %zmm23 {%k6}
|
||||
|
||||
// CHECK: vpsubsw %zmm23, %zmm23, %zmm23 {%k6} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x45,0xc6,0xe9,0xff]
|
||||
vpsubsw %zmm23, %zmm23, %zmm23 {%k6} {z}
|
||||
|
||||
// CHECK: vpsubsw (%rcx), %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0x39]
|
||||
vpsubsw (%rcx), %zmm23, %zmm23
|
||||
|
||||
// CHECK: vpsubsw 291(%rax,%r14,8), %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0xa1,0x45,0x40,0xe9,0xbc,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpsubsw 291(%rax,%r14,8), %zmm23, %zmm23
|
||||
|
||||
// CHECK: vpsubsw 8128(%rdx), %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0x7a,0x7f]
|
||||
vpsubsw 8128(%rdx), %zmm23, %zmm23
|
||||
|
||||
// CHECK: vpsubsw 8192(%rdx), %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0xba,0x00,0x20,0x00,0x00]
|
||||
vpsubsw 8192(%rdx), %zmm23, %zmm23
|
||||
|
||||
// CHECK: vpsubsw -8192(%rdx), %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0x7a,0x80]
|
||||
vpsubsw -8192(%rdx), %zmm23, %zmm23
|
||||
|
||||
// CHECK: vpsubsw -8256(%rdx), %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0xe1,0x45,0x40,0xe9,0xba,0xc0,0xdf,0xff,0xff]
|
||||
vpsubsw -8256(%rdx), %zmm23, %zmm23
|
||||
|
||||
// CHECK: vpsubusb %zmm25, %zmm29, %zmm27
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0x40,0xd8,0xd9]
|
||||
vpsubusb %zmm25, %zmm29, %zmm27
|
||||
|
||||
// CHECK: vpsubusb %zmm25, %zmm29, %zmm27 {%k4}
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0x44,0xd8,0xd9]
|
||||
vpsubusb %zmm25, %zmm29, %zmm27 {%k4}
|
||||
|
||||
// CHECK: vpsubusb %zmm25, %zmm29, %zmm27 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x01,0x15,0xc4,0xd8,0xd9]
|
||||
vpsubusb %zmm25, %zmm29, %zmm27 {%k4} {z}
|
||||
|
||||
// CHECK: vpsubusb (%rcx), %zmm29, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x19]
|
||||
vpsubusb (%rcx), %zmm29, %zmm27
|
||||
|
||||
// CHECK: vpsubusb 291(%rax,%r14,8), %zmm29, %zmm27
|
||||
// CHECK: encoding: [0x62,0x21,0x15,0x40,0xd8,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpsubusb 291(%rax,%r14,8), %zmm29, %zmm27
|
||||
|
||||
// CHECK: vpsubusb 8128(%rdx), %zmm29, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x5a,0x7f]
|
||||
vpsubusb 8128(%rdx), %zmm29, %zmm27
|
||||
|
||||
// CHECK: vpsubusb 8192(%rdx), %zmm29, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x9a,0x00,0x20,0x00,0x00]
|
||||
vpsubusb 8192(%rdx), %zmm29, %zmm27
|
||||
|
||||
// CHECK: vpsubusb -8192(%rdx), %zmm29, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x5a,0x80]
|
||||
vpsubusb -8192(%rdx), %zmm29, %zmm27
|
||||
|
||||
// CHECK: vpsubusb -8256(%rdx), %zmm29, %zmm27
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xd8,0x9a,0xc0,0xdf,0xff,0xff]
|
||||
vpsubusb -8256(%rdx), %zmm29, %zmm27
|
||||
|
||||
// CHECK: vpsubusw %zmm25, %zmm20, %zmm20
|
||||
// CHECK: encoding: [0x62,0x81,0x5d,0x40,0xd9,0xe1]
|
||||
vpsubusw %zmm25, %zmm20, %zmm20
|
||||
|
||||
// CHECK: vpsubusw %zmm25, %zmm20, %zmm20 {%k6}
|
||||
// CHECK: encoding: [0x62,0x81,0x5d,0x46,0xd9,0xe1]
|
||||
vpsubusw %zmm25, %zmm20, %zmm20 {%k6}
|
||||
|
||||
// CHECK: vpsubusw %zmm25, %zmm20, %zmm20 {%k6} {z}
|
||||
// CHECK: encoding: [0x62,0x81,0x5d,0xc6,0xd9,0xe1]
|
||||
vpsubusw %zmm25, %zmm20, %zmm20 {%k6} {z}
|
||||
|
||||
// CHECK: vpsubusw (%rcx), %zmm20, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0x21]
|
||||
vpsubusw (%rcx), %zmm20, %zmm20
|
||||
|
||||
// CHECK: vpsubusw 291(%rax,%r14,8), %zmm20, %zmm20
|
||||
// CHECK: encoding: [0x62,0xa1,0x5d,0x40,0xd9,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpsubusw 291(%rax,%r14,8), %zmm20, %zmm20
|
||||
|
||||
// CHECK: vpsubusw 8128(%rdx), %zmm20, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0x62,0x7f]
|
||||
vpsubusw 8128(%rdx), %zmm20, %zmm20
|
||||
|
||||
// CHECK: vpsubusw 8192(%rdx), %zmm20, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0xa2,0x00,0x20,0x00,0x00]
|
||||
vpsubusw 8192(%rdx), %zmm20, %zmm20
|
||||
|
||||
// CHECK: vpsubusw -8192(%rdx), %zmm20, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0x62,0x80]
|
||||
vpsubusw -8192(%rdx), %zmm20, %zmm20
|
||||
|
||||
// CHECK: vpsubusw -8256(%rdx), %zmm20, %zmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0xa2,0xc0,0xdf,0xff,0xff]
|
||||
vpsubusw -8256(%rdx), %zmm20, %zmm20
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user