AVX-512: Added logical and arithmetic instructions for SKX

by Asaf Badouh (asaf.badouh@intel.com)



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235375 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2015-04-21 10:27:40 +00:00
parent 01eaaa72bf
commit a1fa0de258
10 changed files with 4893 additions and 83 deletions

View File

@ -3089,21 +3089,57 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
//Bitwise Ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_pand_d_128 : GCCBuiltin<"__builtin_ia32_pandd128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pand_d_256 : GCCBuiltin<"__builtin_ia32_pandd256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pand_q_128 : GCCBuiltin<"__builtin_ia32_pandq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pand_q_256 : GCCBuiltin<"__builtin_ia32_pandq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pand_q_512 : GCCBuiltin<"__builtin_ia32_pandq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_por_d_128 : GCCBuiltin<"__builtin_ia32_pord128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_por_d_256 : GCCBuiltin<"__builtin_ia32_pord256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_por_d_512 : GCCBuiltin<"__builtin_ia32_pord512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_por_q_128 : GCCBuiltin<"__builtin_ia32_porq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_por_q_256 : GCCBuiltin<"__builtin_ia32_porq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_por_q_512 : GCCBuiltin<"__builtin_ia32_porq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pxor_d_128 : GCCBuiltin<"__builtin_ia32_pxord128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pxor_d_256 : GCCBuiltin<"__builtin_ia32_pxord256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pxor_d_512 : GCCBuiltin<"__builtin_ia32_pxord512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pxor_q_128 : GCCBuiltin<"__builtin_ia32_pxorq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pxor_q_256 : GCCBuiltin<"__builtin_ia32_pxorq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pxor_q_512 : GCCBuiltin<"__builtin_ia32_pxorq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
@ -3235,28 +3271,203 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
}
// FP logical ops
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_and_pd_128 : GCCBuiltin<"__builtin_ia32_andpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_and_pd_256 : GCCBuiltin<"__builtin_ia32_andpd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_and_pd_512 : GCCBuiltin<"__builtin_ia32_andpd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_and_ps_128 : GCCBuiltin<"__builtin_ia32_andps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_and_ps_256 : GCCBuiltin<"__builtin_ia32_andps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_and_ps_512 : GCCBuiltin<"__builtin_ia32_andps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_andn_pd_128 : GCCBuiltin<"__builtin_ia32_andnpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_andn_pd_256 : GCCBuiltin<"__builtin_ia32_andnpd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_andn_pd_512 : GCCBuiltin<"__builtin_ia32_andnpd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_andn_ps_128 : GCCBuiltin<"__builtin_ia32_andnps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_andn_ps_256 : GCCBuiltin<"__builtin_ia32_andnps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_andn_ps_512 : GCCBuiltin<"__builtin_ia32_andnps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_or_pd_128 : GCCBuiltin<"__builtin_ia32_orpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_or_pd_256 : GCCBuiltin<"__builtin_ia32_orpd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_or_pd_512 : GCCBuiltin<"__builtin_ia32_orpd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_or_ps_128 : GCCBuiltin<"__builtin_ia32_orps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_or_ps_256 : GCCBuiltin<"__builtin_ia32_orps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_or_ps_512 : GCCBuiltin<"__builtin_ia32_orps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_xor_pd_128 : GCCBuiltin<"__builtin_ia32_xorpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_xor_pd_256 : GCCBuiltin<"__builtin_ia32_xorpd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_xor_pd_512 : GCCBuiltin<"__builtin_ia32_xorpd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_xor_ps_128 : GCCBuiltin<"__builtin_ia32_xorps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_xor_ps_256 : GCCBuiltin<"__builtin_ia32_xorps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_xor_ps_512 : GCCBuiltin<"__builtin_ia32_xorps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
}
// Integer arithmetic ops
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_padd_b_128 : GCCBuiltin<"__builtin_ia32_paddb128_mask">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_b_256 : GCCBuiltin<"__builtin_ia32_paddb256_mask">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_b_512 : GCCBuiltin<"__builtin_ia32_paddb512_mask">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_w_128 : GCCBuiltin<"__builtin_ia32_paddw128_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_w_256 : GCCBuiltin<"__builtin_ia32_paddw256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_w_512 : GCCBuiltin<"__builtin_ia32_paddw512_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_d_128 : GCCBuiltin<"__builtin_ia32_paddd128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_d_256 : GCCBuiltin<"__builtin_ia32_paddd256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_d_512 : GCCBuiltin<"__builtin_ia32_paddd512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_q_128 : GCCBuiltin<"__builtin_ia32_paddq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_q_256 : GCCBuiltin<"__builtin_ia32_paddq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_padd_q_512 : GCCBuiltin<"__builtin_ia32_paddq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_b_128 : GCCBuiltin<"__builtin_ia32_psubb128_mask">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_b_256 : GCCBuiltin<"__builtin_ia32_psubb256_mask">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_b_512 : GCCBuiltin<"__builtin_ia32_psubb512_mask">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_w_128 : GCCBuiltin<"__builtin_ia32_psubw128_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_w_256 : GCCBuiltin<"__builtin_ia32_psubw256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_w_512 : GCCBuiltin<"__builtin_ia32_psubw512_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_d_128 : GCCBuiltin<"__builtin_ia32_psubd128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_d_256 : GCCBuiltin<"__builtin_ia32_psubd256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_d_512 : GCCBuiltin<"__builtin_ia32_psubd512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_q_128 : GCCBuiltin<"__builtin_ia32_psubq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_q_256 : GCCBuiltin<"__builtin_ia32_psubq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psub_q_512 : GCCBuiltin<"__builtin_ia32_psubq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmulu_dq_128 : GCCBuiltin<"__builtin_ia32_pmuludq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmul_dq_128 : GCCBuiltin<"__builtin_ia32_pmuldq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmulu_dq_256 : GCCBuiltin<"__builtin_ia32_pmuludq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmul_dq_256 : GCCBuiltin<"__builtin_ia32_pmuldq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_w_128 : GCCBuiltin<"__builtin_ia32_pmullw128_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_w_256 : GCCBuiltin<"__builtin_ia32_pmullw256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_w_512 : GCCBuiltin<"__builtin_ia32_pmullw512_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_d_128 : GCCBuiltin<"__builtin_ia32_pmulld128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_d_256 : GCCBuiltin<"__builtin_ia32_pmulld256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_d_512 : GCCBuiltin<"__builtin_ia32_pmulld512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_q_128 : GCCBuiltin<"__builtin_ia32_pmullq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_q_256 : GCCBuiltin<"__builtin_ia32_pmullq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pmull_q_512 : GCCBuiltin<"__builtin_ia32_pmullq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Gather and Scatter ops
let TargetPrefix = "x86" in {
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">,

View File

@ -1373,7 +1373,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
}
if (Subtarget->hasDQI()) {
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
// Custom lower several nodes.
for (MVT VT : MVT::vector_valuetypes()) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();

View File

@ -2996,7 +2996,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
OpcodeStr,
"${src2}"##_Dst.BroadcastStr##", $src1",
"$src1, ${src2}"##_Dst.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Dst.VT (X86VBroadcast
(_Dst.ScalarLdFrag addr:$src2)))))),
"", itins.rm>,
@ -3015,13 +3015,27 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
X86pmuldq, v16i32_info, v8i64_info, 1>,
T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
SDNode OpNode, bit IsCommutable = 0> {
defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
X86pmuludq, v16i32_info, v8i64_info, 1>,
EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
v16i32_info, v8i64_info, IsCommutable>,
EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
let Predicates = [HasVLX] in {
defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
v8i32x_info, v4i64x_info, IsCommutable>,
EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
v4i32x_info, v2i64x_info, IsCommutable>,
EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
}
}
defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
X86pmuldq, 1>,T8PD;
defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
X86pmuludq, 1>;
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
@ -3340,7 +3354,12 @@ defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>;
let Predicates = [HasDQI] in {
defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, 1>;
defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, 0>;
defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, 1>;
defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
}
def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
(i16 -1), FROUND_CURRENT)),

View File

@ -171,81 +171,93 @@ static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(avx2_psign_w, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(avx2_psign_w, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
X86ISD::FADD_RND),
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
X86ISD::FADD_RND),
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_and_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
X86_INTRINSIC_DATA(avx512_mask_and_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
X86_INTRINSIC_DATA(avx512_mask_and_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
X86_INTRINSIC_DATA(avx512_mask_and_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
X86_INTRINSIC_DATA(avx512_mask_and_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
X86_INTRINSIC_DATA(avx512_mask_and_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
X86_INTRINSIC_DATA(avx512_mask_andn_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_andn_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_andn_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_andn_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_andn_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_andn_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_512, BLEND, X86ISD::SELECT, 0),
@ -334,9 +346,29 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_or_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_b_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_b_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_b_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_d_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_d_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_q_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_q_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_w_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_w_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_padd_w_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_q_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_q_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
@ -362,11 +394,32 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_q_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_q_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_q_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_w_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_w_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_w_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_256, INTR_TYPE_2OP_MASK,
X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_por_d_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_d_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
@ -386,9 +439,23 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_b_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_b_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_d_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_d_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_q_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_q_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_w_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_w_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_psub_w_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
@ -410,6 +477,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),

View File

@ -2163,3 +2163,80 @@ define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask
}
declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
;CHECK-LABEL: test_mask_mullo_epi32_rr_512
;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
;CHECK-LABEL: test_mask_mullo_epi32_rm_512
;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)

View File

@ -996,3 +996,426 @@ define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double>
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
ret <4 x double> %res
}
define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
;CHECK-LABEL: test_mask_add_epi16_rr_128
;CHECK: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rrk_128
;CHECK: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rrkz_128
;CHECK: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_add_epi16_rm_128
;CHECK: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rmk_128
;CHECK: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rmkz_128
;CHECK: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
;CHECK-LABEL: test_mask_add_epi16_rr_256
;CHECK: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rrk_256
;CHECK: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rrkz_256
;CHECK: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_add_epi16_rm_256
;CHECK: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rmk_256
;CHECK: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rmkz_256
;CHECK: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
;CHECK-LABEL: test_mask_sub_epi16_rr_128
;CHECK: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rrk_128
;CHECK: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rrkz_128
;CHECK: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_sub_epi16_rm_128
;CHECK: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rmk_128
;CHECK: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rmkz_128
;CHECK: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
;CHECK-LABEL: test_mask_sub_epi16_rr_256
;CHECK: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rrk_256
;CHECK: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rrkz_256
;CHECK: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_sub_epi16_rm_256
;CHECK: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rmk_256
;CHECK: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rmkz_256
;CHECK: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
;CHECK-LABEL: test_mask_add_epi16_rr_512
;CHECK: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rrk_512
;CHECK: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rrkz_512
;CHECK: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_add_epi16_rm_512
;CHECK: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rmk_512
;CHECK: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_add_epi16_rmkz_512
;CHECK: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
;CHECK-LABEL: test_mask_sub_epi16_rr_512
;CHECK: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rrk_512
;CHECK: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rrkz_512
;CHECK: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_sub_epi16_rm_512
;CHECK: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rmk_512
;CHECK: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_sub_epi16_rmkz_512
;CHECK: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
;CHECK-LABEL: test_mask_mullo_epi16_rr_512
;CHECK: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rrk_512
;CHECK: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rrkz_512
;CHECK: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_mullo_epi16_rm_512
;CHECK: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rmk_512
;CHECK: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rmkz_512
;CHECK: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
;CHECK-LABEL: test_mask_mullo_epi16_rr_128
;CHECK: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rrk_128
;CHECK: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rrkz_128
;CHECK: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_mullo_epi16_rm_128
;CHECK: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rmk_128
;CHECK: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rmkz_128
;CHECK: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
%b = load <8 x i16>, <8 x i16>* %ptr_b
%res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
;CHECK-LABEL: test_mask_mullo_epi16_rr_256
;CHECK: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rrk_256
;CHECK: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rrkz_256
;CHECK: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
;CHECK-LABEL: test_mask_mullo_epi16_rm_256
;CHECK: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rmk_256
;CHECK: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_mullo_epi16_rmkz_256
;CHECK: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
%b = load <16 x i16>, <16 x i16>* %ptr_b
%res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)

File diff suppressed because it is too large Load Diff

View File

@ -127,3 +127,898 @@
// CHECK: kmovb %k3, %r13d
// CHECK: encoding: [0xc5,0x79,0x93,0xeb]
kmovb %k3, %r13d
// CHECK: vandpd %zmm27, %zmm28, %zmm19
// CHECK: encoding: [0x62,0x81,0x9d,0x40,0x54,0xdb]
vandpd %zmm27, %zmm28, %zmm19
// CHECK: vandpd %zmm27, %zmm28, %zmm19 {%k5}
// CHECK: encoding: [0x62,0x81,0x9d,0x45,0x54,0xdb]
vandpd %zmm27, %zmm28, %zmm19 {%k5}
// CHECK: vandpd %zmm27, %zmm28, %zmm19 {%k5} {z}
// CHECK: encoding: [0x62,0x81,0x9d,0xc5,0x54,0xdb]
vandpd %zmm27, %zmm28, %zmm19 {%k5} {z}
// CHECK: vandpd (%rcx), %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x19]
vandpd (%rcx), %zmm28, %zmm19
// CHECK: vandpd 291(%rax,%r14,8), %zmm28, %zmm19
// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x54,0x9c,0xf0,0x23,0x01,0x00,0x00]
vandpd 291(%rax,%r14,8), %zmm28, %zmm19
// CHECK: vandpd (%rcx){1to8}, %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x19]
vandpd (%rcx){1to8}, %zmm28, %zmm19
// CHECK: vandpd 8128(%rdx), %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x5a,0x7f]
vandpd 8128(%rdx), %zmm28, %zmm19
// CHECK: vandpd 8192(%rdx), %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x9a,0x00,0x20,0x00,0x00]
vandpd 8192(%rdx), %zmm28, %zmm19
// CHECK: vandpd -8192(%rdx), %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x5a,0x80]
vandpd -8192(%rdx), %zmm28, %zmm19
// CHECK: vandpd -8256(%rdx), %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x54,0x9a,0xc0,0xdf,0xff,0xff]
vandpd -8256(%rdx), %zmm28, %zmm19
// CHECK: vandpd 1016(%rdx){1to8}, %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x5a,0x7f]
vandpd 1016(%rdx){1to8}, %zmm28, %zmm19
// CHECK: vandpd 1024(%rdx){1to8}, %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x9a,0x00,0x04,0x00,0x00]
vandpd 1024(%rdx){1to8}, %zmm28, %zmm19
// CHECK: vandpd -1024(%rdx){1to8}, %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x5a,0x80]
vandpd -1024(%rdx){1to8}, %zmm28, %zmm19
// CHECK: vandpd -1032(%rdx){1to8}, %zmm28, %zmm19
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x54,0x9a,0xf8,0xfb,0xff,0xff]
vandpd -1032(%rdx){1to8}, %zmm28, %zmm19
// CHECK: vandps %zmm25, %zmm22, %zmm17
// CHECK: encoding: [0x62,0x81,0x4c,0x40,0x54,0xc9]
vandps %zmm25, %zmm22, %zmm17
// CHECK: vandps %zmm25, %zmm22, %zmm17 {%k4}
// CHECK: encoding: [0x62,0x81,0x4c,0x44,0x54,0xc9]
vandps %zmm25, %zmm22, %zmm17 {%k4}
// CHECK: vandps %zmm25, %zmm22, %zmm17 {%k4} {z}
// CHECK: encoding: [0x62,0x81,0x4c,0xc4,0x54,0xc9]
vandps %zmm25, %zmm22, %zmm17 {%k4} {z}
// CHECK: vandps (%rcx), %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x09]
vandps (%rcx), %zmm22, %zmm17
// CHECK: vandps 291(%rax,%r14,8), %zmm22, %zmm17
// CHECK: encoding: [0x62,0xa1,0x4c,0x40,0x54,0x8c,0xf0,0x23,0x01,0x00,0x00]
vandps 291(%rax,%r14,8), %zmm22, %zmm17
// CHECK: vandps (%rcx){1to16}, %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x09]
vandps (%rcx){1to16}, %zmm22, %zmm17
// CHECK: vandps 8128(%rdx), %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x4a,0x7f]
vandps 8128(%rdx), %zmm22, %zmm17
// CHECK: vandps 8192(%rdx), %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x8a,0x00,0x20,0x00,0x00]
vandps 8192(%rdx), %zmm22, %zmm17
// CHECK: vandps -8192(%rdx), %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x4a,0x80]
vandps -8192(%rdx), %zmm22, %zmm17
// CHECK: vandps -8256(%rdx), %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x40,0x54,0x8a,0xc0,0xdf,0xff,0xff]
vandps -8256(%rdx), %zmm22, %zmm17
// CHECK: vandps 508(%rdx){1to16}, %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x4a,0x7f]
vandps 508(%rdx){1to16}, %zmm22, %zmm17
// CHECK: vandps 512(%rdx){1to16}, %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x8a,0x00,0x02,0x00,0x00]
vandps 512(%rdx){1to16}, %zmm22, %zmm17
// CHECK: vandps -512(%rdx){1to16}, %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x4a,0x80]
vandps -512(%rdx){1to16}, %zmm22, %zmm17
// CHECK: vandps -516(%rdx){1to16}, %zmm22, %zmm17
// CHECK: encoding: [0x62,0xe1,0x4c,0x50,0x54,0x8a,0xfc,0xfd,0xff,0xff]
vandps -516(%rdx){1to16}, %zmm22, %zmm17
// CHECK: vandnpd %zmm22, %zmm17, %zmm20
// CHECK: encoding: [0x62,0xa1,0xf5,0x40,0x55,0xe6]
vandnpd %zmm22, %zmm17, %zmm20
// CHECK: vandnpd %zmm22, %zmm17, %zmm20 {%k1}
// CHECK: encoding: [0x62,0xa1,0xf5,0x41,0x55,0xe6]
vandnpd %zmm22, %zmm17, %zmm20 {%k1}
// CHECK: vandnpd %zmm22, %zmm17, %zmm20 {%k1} {z}
// CHECK: encoding: [0x62,0xa1,0xf5,0xc1,0x55,0xe6]
vandnpd %zmm22, %zmm17, %zmm20 {%k1} {z}
// CHECK: vandnpd (%rcx), %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x21]
vandnpd (%rcx), %zmm17, %zmm20
// CHECK: vandnpd 291(%rax,%r14,8), %zmm17, %zmm20
// CHECK: encoding: [0x62,0xa1,0xf5,0x40,0x55,0xa4,0xf0,0x23,0x01,0x00,0x00]
vandnpd 291(%rax,%r14,8), %zmm17, %zmm20
// CHECK: vandnpd (%rcx){1to8}, %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x21]
vandnpd (%rcx){1to8}, %zmm17, %zmm20
// CHECK: vandnpd 8128(%rdx), %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x62,0x7f]
vandnpd 8128(%rdx), %zmm17, %zmm20
// CHECK: vandnpd 8192(%rdx), %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0xa2,0x00,0x20,0x00,0x00]
vandnpd 8192(%rdx), %zmm17, %zmm20
// CHECK: vandnpd -8192(%rdx), %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0x62,0x80]
vandnpd -8192(%rdx), %zmm17, %zmm20
// CHECK: vandnpd -8256(%rdx), %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x40,0x55,0xa2,0xc0,0xdf,0xff,0xff]
vandnpd -8256(%rdx), %zmm17, %zmm20
// CHECK: vandnpd 1016(%rdx){1to8}, %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x62,0x7f]
vandnpd 1016(%rdx){1to8}, %zmm17, %zmm20
// CHECK: vandnpd 1024(%rdx){1to8}, %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0xa2,0x00,0x04,0x00,0x00]
vandnpd 1024(%rdx){1to8}, %zmm17, %zmm20
// CHECK: vandnpd -1024(%rdx){1to8}, %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0x62,0x80]
vandnpd -1024(%rdx){1to8}, %zmm17, %zmm20
// CHECK: vandnpd -1032(%rdx){1to8}, %zmm17, %zmm20
// CHECK: encoding: [0x62,0xe1,0xf5,0x50,0x55,0xa2,0xf8,0xfb,0xff,0xff]
vandnpd -1032(%rdx){1to8}, %zmm17, %zmm20
// CHECK: vandnps %zmm19, %zmm17, %zmm22
// CHECK: encoding: [0x62,0xa1,0x74,0x40,0x55,0xf3]
vandnps %zmm19, %zmm17, %zmm22
// CHECK: vandnps %zmm19, %zmm17, %zmm22 {%k2}
// CHECK: encoding: [0x62,0xa1,0x74,0x42,0x55,0xf3]
vandnps %zmm19, %zmm17, %zmm22 {%k2}
// CHECK: vandnps %zmm19, %zmm17, %zmm22 {%k2} {z}
// CHECK: encoding: [0x62,0xa1,0x74,0xc2,0x55,0xf3]
vandnps %zmm19, %zmm17, %zmm22 {%k2} {z}
// CHECK: vandnps (%rcx), %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x31]
vandnps (%rcx), %zmm17, %zmm22
// CHECK: vandnps 291(%rax,%r14,8), %zmm17, %zmm22
// CHECK: encoding: [0x62,0xa1,0x74,0x40,0x55,0xb4,0xf0,0x23,0x01,0x00,0x00]
vandnps 291(%rax,%r14,8), %zmm17, %zmm22
// CHECK: vandnps (%rcx){1to16}, %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x31]
vandnps (%rcx){1to16}, %zmm17, %zmm22
// CHECK: vandnps 8128(%rdx), %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x72,0x7f]
vandnps 8128(%rdx), %zmm17, %zmm22
// CHECK: vandnps 8192(%rdx), %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0xb2,0x00,0x20,0x00,0x00]
vandnps 8192(%rdx), %zmm17, %zmm22
// CHECK: vandnps -8192(%rdx), %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0x72,0x80]
vandnps -8192(%rdx), %zmm17, %zmm22
// CHECK: vandnps -8256(%rdx), %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x40,0x55,0xb2,0xc0,0xdf,0xff,0xff]
vandnps -8256(%rdx), %zmm17, %zmm22
// CHECK: vandnps 508(%rdx){1to16}, %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x72,0x7f]
vandnps 508(%rdx){1to16}, %zmm17, %zmm22
// CHECK: vandnps 512(%rdx){1to16}, %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0xb2,0x00,0x02,0x00,0x00]
vandnps 512(%rdx){1to16}, %zmm17, %zmm22
// CHECK: vandnps -512(%rdx){1to16}, %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0x72,0x80]
vandnps -512(%rdx){1to16}, %zmm17, %zmm22
// CHECK: vandnps -516(%rdx){1to16}, %zmm17, %zmm22
// CHECK: encoding: [0x62,0xe1,0x74,0x50,0x55,0xb2,0xfc,0xfd,0xff,0xff]
vandnps -516(%rdx){1to16}, %zmm17, %zmm22
// CHECK: vorpd %zmm21, %zmm22, %zmm30
// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x56,0xf5]
vorpd %zmm21, %zmm22, %zmm30
// CHECK: vorpd %zmm21, %zmm22, %zmm30 {%k6}
// CHECK: encoding: [0x62,0x21,0xcd,0x46,0x56,0xf5]
vorpd %zmm21, %zmm22, %zmm30 {%k6}
// CHECK: vorpd %zmm21, %zmm22, %zmm30 {%k6} {z}
// CHECK: encoding: [0x62,0x21,0xcd,0xc6,0x56,0xf5]
vorpd %zmm21, %zmm22, %zmm30 {%k6} {z}
// CHECK: vorpd (%rcx), %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x31]
vorpd (%rcx), %zmm22, %zmm30
// CHECK: vorpd 291(%rax,%r14,8), %zmm22, %zmm30
// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x56,0xb4,0xf0,0x23,0x01,0x00,0x00]
vorpd 291(%rax,%r14,8), %zmm22, %zmm30
// CHECK: vorpd (%rcx){1to8}, %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x31]
vorpd (%rcx){1to8}, %zmm22, %zmm30
// CHECK: vorpd 8128(%rdx), %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x72,0x7f]
vorpd 8128(%rdx), %zmm22, %zmm30
// CHECK: vorpd 8192(%rdx), %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0xb2,0x00,0x20,0x00,0x00]
vorpd 8192(%rdx), %zmm22, %zmm30
// CHECK: vorpd -8192(%rdx), %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0x72,0x80]
vorpd -8192(%rdx), %zmm22, %zmm30
// CHECK: vorpd -8256(%rdx), %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x56,0xb2,0xc0,0xdf,0xff,0xff]
vorpd -8256(%rdx), %zmm22, %zmm30
// CHECK: vorpd 1016(%rdx){1to8}, %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x72,0x7f]
vorpd 1016(%rdx){1to8}, %zmm22, %zmm30
// CHECK: vorpd 1024(%rdx){1to8}, %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0xb2,0x00,0x04,0x00,0x00]
vorpd 1024(%rdx){1to8}, %zmm22, %zmm30
// CHECK: vorpd -1024(%rdx){1to8}, %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0x72,0x80]
vorpd -1024(%rdx){1to8}, %zmm22, %zmm30
// CHECK: vorpd -1032(%rdx){1to8}, %zmm22, %zmm30
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x56,0xb2,0xf8,0xfb,0xff,0xff]
vorpd -1032(%rdx){1to8}, %zmm22, %zmm30
// CHECK: vorps %zmm26, %zmm21, %zmm22
// CHECK: encoding: [0x62,0x81,0x54,0x40,0x56,0xf2]
vorps %zmm26, %zmm21, %zmm22
// CHECK: vorps %zmm26, %zmm21, %zmm22 {%k7}
// CHECK: encoding: [0x62,0x81,0x54,0x47,0x56,0xf2]
vorps %zmm26, %zmm21, %zmm22 {%k7}
// CHECK: vorps %zmm26, %zmm21, %zmm22 {%k7} {z}
// CHECK: encoding: [0x62,0x81,0x54,0xc7,0x56,0xf2]
vorps %zmm26, %zmm21, %zmm22 {%k7} {z}
// CHECK: vorps (%rcx), %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x31]
vorps (%rcx), %zmm21, %zmm22
// CHECK: vorps 291(%rax,%r14,8), %zmm21, %zmm22
// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x56,0xb4,0xf0,0x23,0x01,0x00,0x00]
vorps 291(%rax,%r14,8), %zmm21, %zmm22
// CHECK: vorps (%rcx){1to16}, %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x31]
vorps (%rcx){1to16}, %zmm21, %zmm22
// CHECK: vorps 8128(%rdx), %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x72,0x7f]
vorps 8128(%rdx), %zmm21, %zmm22
// CHECK: vorps 8192(%rdx), %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0xb2,0x00,0x20,0x00,0x00]
vorps 8192(%rdx), %zmm21, %zmm22
// CHECK: vorps -8192(%rdx), %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0x72,0x80]
vorps -8192(%rdx), %zmm21, %zmm22
// CHECK: vorps -8256(%rdx), %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x56,0xb2,0xc0,0xdf,0xff,0xff]
vorps -8256(%rdx), %zmm21, %zmm22
// CHECK: vorps 508(%rdx){1to16}, %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x72,0x7f]
vorps 508(%rdx){1to16}, %zmm21, %zmm22
// CHECK: vorps 512(%rdx){1to16}, %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0xb2,0x00,0x02,0x00,0x00]
vorps 512(%rdx){1to16}, %zmm21, %zmm22
// CHECK: vorps -512(%rdx){1to16}, %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0x72,0x80]
vorps -512(%rdx){1to16}, %zmm21, %zmm22
// CHECK: vorps -516(%rdx){1to16}, %zmm21, %zmm22
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x56,0xb2,0xfc,0xfd,0xff,0xff]
vorps -516(%rdx){1to16}, %zmm21, %zmm22
// CHECK: vxorpd %zmm24, %zmm24, %zmm27
// CHECK: encoding: [0x62,0x01,0xbd,0x40,0x57,0xd8]
vxorpd %zmm24, %zmm24, %zmm27
// CHECK: vxorpd %zmm24, %zmm24, %zmm27 {%k5}
// CHECK: encoding: [0x62,0x01,0xbd,0x45,0x57,0xd8]
vxorpd %zmm24, %zmm24, %zmm27 {%k5}
// CHECK: vxorpd %zmm24, %zmm24, %zmm27 {%k5} {z}
// CHECK: encoding: [0x62,0x01,0xbd,0xc5,0x57,0xd8]
vxorpd %zmm24, %zmm24, %zmm27 {%k5} {z}
// CHECK: vxorpd (%rcx), %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x19]
vxorpd (%rcx), %zmm24, %zmm27
// CHECK: vxorpd 291(%rax,%r14,8), %zmm24, %zmm27
// CHECK: encoding: [0x62,0x21,0xbd,0x40,0x57,0x9c,0xf0,0x23,0x01,0x00,0x00]
vxorpd 291(%rax,%r14,8), %zmm24, %zmm27
// CHECK: vxorpd (%rcx){1to8}, %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x19]
vxorpd (%rcx){1to8}, %zmm24, %zmm27
// CHECK: vxorpd 8128(%rdx), %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x5a,0x7f]
vxorpd 8128(%rdx), %zmm24, %zmm27
// CHECK: vxorpd 8192(%rdx), %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x9a,0x00,0x20,0x00,0x00]
vxorpd 8192(%rdx), %zmm24, %zmm27
// CHECK: vxorpd -8192(%rdx), %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x5a,0x80]
vxorpd -8192(%rdx), %zmm24, %zmm27
// CHECK: vxorpd -8256(%rdx), %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x40,0x57,0x9a,0xc0,0xdf,0xff,0xff]
vxorpd -8256(%rdx), %zmm24, %zmm27
// CHECK: vxorpd 1016(%rdx){1to8}, %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x5a,0x7f]
vxorpd 1016(%rdx){1to8}, %zmm24, %zmm27
// CHECK: vxorpd 1024(%rdx){1to8}, %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x9a,0x00,0x04,0x00,0x00]
vxorpd 1024(%rdx){1to8}, %zmm24, %zmm27
// CHECK: vxorpd -1024(%rdx){1to8}, %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x5a,0x80]
vxorpd -1024(%rdx){1to8}, %zmm24, %zmm27
// CHECK: vxorpd -1032(%rdx){1to8}, %zmm24, %zmm27
// CHECK: encoding: [0x62,0x61,0xbd,0x50,0x57,0x9a,0xf8,0xfb,0xff,0xff]
vxorpd -1032(%rdx){1to8}, %zmm24, %zmm27
// CHECK: vxorps %zmm19, %zmm18, %zmm18
// CHECK: encoding: [0x62,0xa1,0x6c,0x40,0x57,0xd3]
vxorps %zmm19, %zmm18, %zmm18
// CHECK: vxorps %zmm19, %zmm18, %zmm18 {%k2}
// CHECK: encoding: [0x62,0xa1,0x6c,0x42,0x57,0xd3]
vxorps %zmm19, %zmm18, %zmm18 {%k2}
// CHECK: vxorps %zmm19, %zmm18, %zmm18 {%k2} {z}
// CHECK: encoding: [0x62,0xa1,0x6c,0xc2,0x57,0xd3]
vxorps %zmm19, %zmm18, %zmm18 {%k2} {z}
// CHECK: vxorps (%rcx), %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x11]
vxorps (%rcx), %zmm18, %zmm18
// CHECK: vxorps 291(%rax,%r14,8), %zmm18, %zmm18
// CHECK: encoding: [0x62,0xa1,0x6c,0x40,0x57,0x94,0xf0,0x23,0x01,0x00,0x00]
vxorps 291(%rax,%r14,8), %zmm18, %zmm18
// CHECK: vxorps (%rcx){1to16}, %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x11]
vxorps (%rcx){1to16}, %zmm18, %zmm18
// CHECK: vxorps 8128(%rdx), %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x52,0x7f]
vxorps 8128(%rdx), %zmm18, %zmm18
// CHECK: vxorps 8192(%rdx), %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x92,0x00,0x20,0x00,0x00]
vxorps 8192(%rdx), %zmm18, %zmm18
// CHECK: vxorps -8192(%rdx), %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x52,0x80]
vxorps -8192(%rdx), %zmm18, %zmm18
// CHECK: vxorps -8256(%rdx), %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x40,0x57,0x92,0xc0,0xdf,0xff,0xff]
vxorps -8256(%rdx), %zmm18, %zmm18
// CHECK: vxorps 508(%rdx){1to16}, %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x52,0x7f]
vxorps 508(%rdx){1to16}, %zmm18, %zmm18
// CHECK: vxorps 512(%rdx){1to16}, %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x92,0x00,0x02,0x00,0x00]
vxorps 512(%rdx){1to16}, %zmm18, %zmm18
// CHECK: vxorps -512(%rdx){1to16}, %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x52,0x80]
vxorps -512(%rdx){1to16}, %zmm18, %zmm18
// CHECK: vxorps -516(%rdx){1to16}, %zmm18, %zmm18
// CHECK: encoding: [0x62,0xe1,0x6c,0x50,0x57,0x92,0xfc,0xfd,0xff,0xff]
vxorps -516(%rdx){1to16}, %zmm18, %zmm18
// CHECK: vandpd %zmm22, %zmm22, %zmm24
// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x54,0xc6]
vandpd %zmm22, %zmm22, %zmm24
// CHECK: vandpd %zmm22, %zmm22, %zmm24 {%k4}
// CHECK: encoding: [0x62,0x21,0xcd,0x44,0x54,0xc6]
vandpd %zmm22, %zmm22, %zmm24 {%k4}
// CHECK: vandpd %zmm22, %zmm22, %zmm24 {%k4} {z}
// CHECK: encoding: [0x62,0x21,0xcd,0xc4,0x54,0xc6]
vandpd %zmm22, %zmm22, %zmm24 {%k4} {z}
// CHECK: vandpd (%rcx), %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x01]
vandpd (%rcx), %zmm22, %zmm24
// CHECK: vandpd 4660(%rax,%r14,8), %zmm22, %zmm24
// CHECK: encoding: [0x62,0x21,0xcd,0x40,0x54,0x84,0xf0,0x34,0x12,0x00,0x00]
vandpd 4660(%rax,%r14,8), %zmm22, %zmm24
// CHECK: vandpd (%rcx){1to8}, %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x01]
vandpd (%rcx){1to8}, %zmm22, %zmm24
// CHECK: vandpd 8128(%rdx), %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x42,0x7f]
vandpd 8128(%rdx), %zmm22, %zmm24
// CHECK: vandpd 8192(%rdx), %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x82,0x00,0x20,0x00,0x00]
vandpd 8192(%rdx), %zmm22, %zmm24
// CHECK: vandpd -8192(%rdx), %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x42,0x80]
vandpd -8192(%rdx), %zmm22, %zmm24
// CHECK: vandpd -8256(%rdx), %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x40,0x54,0x82,0xc0,0xdf,0xff,0xff]
vandpd -8256(%rdx), %zmm22, %zmm24
// CHECK: vandpd 1016(%rdx){1to8}, %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x42,0x7f]
vandpd 1016(%rdx){1to8}, %zmm22, %zmm24
// CHECK: vandpd 1024(%rdx){1to8}, %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x82,0x00,0x04,0x00,0x00]
vandpd 1024(%rdx){1to8}, %zmm22, %zmm24
// CHECK: vandpd -1024(%rdx){1to8}, %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x42,0x80]
vandpd -1024(%rdx){1to8}, %zmm22, %zmm24
// CHECK: vandpd -1032(%rdx){1to8}, %zmm22, %zmm24
// CHECK: encoding: [0x62,0x61,0xcd,0x50,0x54,0x82,0xf8,0xfb,0xff,0xff]
vandpd -1032(%rdx){1to8}, %zmm22, %zmm24
// CHECK: vandps %zmm23, %zmm23, %zmm30
// CHECK: encoding: [0x62,0x21,0x44,0x40,0x54,0xf7]
vandps %zmm23, %zmm23, %zmm30
// CHECK: vandps %zmm23, %zmm23, %zmm30 {%k5}
// CHECK: encoding: [0x62,0x21,0x44,0x45,0x54,0xf7]
vandps %zmm23, %zmm23, %zmm30 {%k5}
// CHECK: vandps %zmm23, %zmm23, %zmm30 {%k5} {z}
// CHECK: encoding: [0x62,0x21,0x44,0xc5,0x54,0xf7]
vandps %zmm23, %zmm23, %zmm30 {%k5} {z}
// CHECK: vandps (%rcx), %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x31]
vandps (%rcx), %zmm23, %zmm30
// CHECK: vandps 4660(%rax,%r14,8), %zmm23, %zmm30
// CHECK: encoding: [0x62,0x21,0x44,0x40,0x54,0xb4,0xf0,0x34,0x12,0x00,0x00]
vandps 4660(%rax,%r14,8), %zmm23, %zmm30
// CHECK: vandps (%rcx){1to16}, %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x31]
vandps (%rcx){1to16}, %zmm23, %zmm30
// CHECK: vandps 8128(%rdx), %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x72,0x7f]
vandps 8128(%rdx), %zmm23, %zmm30
// CHECK: vandps 8192(%rdx), %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0xb2,0x00,0x20,0x00,0x00]
vandps 8192(%rdx), %zmm23, %zmm30
// CHECK: vandps -8192(%rdx), %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0x72,0x80]
vandps -8192(%rdx), %zmm23, %zmm30
// CHECK: vandps -8256(%rdx), %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x40,0x54,0xb2,0xc0,0xdf,0xff,0xff]
vandps -8256(%rdx), %zmm23, %zmm30
// CHECK: vandps 508(%rdx){1to16}, %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x72,0x7f]
vandps 508(%rdx){1to16}, %zmm23, %zmm30
// CHECK: vandps 512(%rdx){1to16}, %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0xb2,0x00,0x02,0x00,0x00]
vandps 512(%rdx){1to16}, %zmm23, %zmm30
// CHECK: vandps -512(%rdx){1to16}, %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0x72,0x80]
vandps -512(%rdx){1to16}, %zmm23, %zmm30
// CHECK: vandps -516(%rdx){1to16}, %zmm23, %zmm30
// CHECK: encoding: [0x62,0x61,0x44,0x50,0x54,0xb2,0xfc,0xfd,0xff,0xff]
vandps -516(%rdx){1to16}, %zmm23, %zmm30
// CHECK: vandnpd %zmm21, %zmm21, %zmm25
// CHECK: encoding: [0x62,0x21,0xd5,0x40,0x55,0xcd]
vandnpd %zmm21, %zmm21, %zmm25
// CHECK: vandnpd %zmm21, %zmm21, %zmm25 {%k2}
// CHECK: encoding: [0x62,0x21,0xd5,0x42,0x55,0xcd]
vandnpd %zmm21, %zmm21, %zmm25 {%k2}
// CHECK: vandnpd %zmm21, %zmm21, %zmm25 {%k2} {z}
// CHECK: encoding: [0x62,0x21,0xd5,0xc2,0x55,0xcd]
vandnpd %zmm21, %zmm21, %zmm25 {%k2} {z}
// CHECK: vandnpd (%rcx), %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x09]
vandnpd (%rcx), %zmm21, %zmm25
// CHECK: vandnpd 4660(%rax,%r14,8), %zmm21, %zmm25
// CHECK: encoding: [0x62,0x21,0xd5,0x40,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00]
vandnpd 4660(%rax,%r14,8), %zmm21, %zmm25
// CHECK: vandnpd (%rcx){1to8}, %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x09]
vandnpd (%rcx){1to8}, %zmm21, %zmm25
// CHECK: vandnpd 8128(%rdx), %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x4a,0x7f]
vandnpd 8128(%rdx), %zmm21, %zmm25
// CHECK: vandnpd 8192(%rdx), %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x8a,0x00,0x20,0x00,0x00]
vandnpd 8192(%rdx), %zmm21, %zmm25
// CHECK: vandnpd -8192(%rdx), %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x4a,0x80]
vandnpd -8192(%rdx), %zmm21, %zmm25
// CHECK: vandnpd -8256(%rdx), %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x40,0x55,0x8a,0xc0,0xdf,0xff,0xff]
vandnpd -8256(%rdx), %zmm21, %zmm25
// CHECK: vandnpd 1016(%rdx){1to8}, %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x4a,0x7f]
vandnpd 1016(%rdx){1to8}, %zmm21, %zmm25
// CHECK: vandnpd 1024(%rdx){1to8}, %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x8a,0x00,0x04,0x00,0x00]
vandnpd 1024(%rdx){1to8}, %zmm21, %zmm25
// CHECK: vandnpd -1024(%rdx){1to8}, %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x4a,0x80]
vandnpd -1024(%rdx){1to8}, %zmm21, %zmm25
// CHECK: vandnpd -1032(%rdx){1to8}, %zmm21, %zmm25
// CHECK: encoding: [0x62,0x61,0xd5,0x50,0x55,0x8a,0xf8,0xfb,0xff,0xff]
vandnpd -1032(%rdx){1to8}, %zmm21, %zmm25
// CHECK: vandnps %zmm18, %zmm21, %zmm17
// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x55,0xca]
vandnps %zmm18, %zmm21, %zmm17
// CHECK: vandnps %zmm18, %zmm21, %zmm17 {%k1}
// CHECK: encoding: [0x62,0xa1,0x54,0x41,0x55,0xca]
vandnps %zmm18, %zmm21, %zmm17 {%k1}
// CHECK: vandnps %zmm18, %zmm21, %zmm17 {%k1} {z}
// CHECK: encoding: [0x62,0xa1,0x54,0xc1,0x55,0xca]
vandnps %zmm18, %zmm21, %zmm17 {%k1} {z}
// CHECK: vandnps (%rcx), %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x09]
vandnps (%rcx), %zmm21, %zmm17
// CHECK: vandnps 4660(%rax,%r14,8), %zmm21, %zmm17
// CHECK: encoding: [0x62,0xa1,0x54,0x40,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00]
vandnps 4660(%rax,%r14,8), %zmm21, %zmm17
// CHECK: vandnps (%rcx){1to16}, %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x09]
vandnps (%rcx){1to16}, %zmm21, %zmm17
// CHECK: vandnps 8128(%rdx), %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x4a,0x7f]
vandnps 8128(%rdx), %zmm21, %zmm17
// CHECK: vandnps 8192(%rdx), %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x8a,0x00,0x20,0x00,0x00]
vandnps 8192(%rdx), %zmm21, %zmm17
// CHECK: vandnps -8192(%rdx), %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x4a,0x80]
vandnps -8192(%rdx), %zmm21, %zmm17
// CHECK: vandnps -8256(%rdx), %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x40,0x55,0x8a,0xc0,0xdf,0xff,0xff]
vandnps -8256(%rdx), %zmm21, %zmm17
// CHECK: vandnps 508(%rdx){1to16}, %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x4a,0x7f]
vandnps 508(%rdx){1to16}, %zmm21, %zmm17
// CHECK: vandnps 512(%rdx){1to16}, %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x8a,0x00,0x02,0x00,0x00]
vandnps 512(%rdx){1to16}, %zmm21, %zmm17
// CHECK: vandnps -512(%rdx){1to16}, %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x4a,0x80]
vandnps -512(%rdx){1to16}, %zmm21, %zmm17
// CHECK: vandnps -516(%rdx){1to16}, %zmm21, %zmm17
// CHECK: encoding: [0x62,0xe1,0x54,0x50,0x55,0x8a,0xfc,0xfd,0xff,0xff]
vandnps -516(%rdx){1to16}, %zmm21, %zmm17
// CHECK: vorpd %zmm24, %zmm28, %zmm18
// CHECK: encoding: [0x62,0x81,0x9d,0x40,0x56,0xd0]
vorpd %zmm24, %zmm28, %zmm18
// CHECK: vorpd %zmm24, %zmm28, %zmm18 {%k1}
// CHECK: encoding: [0x62,0x81,0x9d,0x41,0x56,0xd0]
vorpd %zmm24, %zmm28, %zmm18 {%k1}
// CHECK: vorpd %zmm24, %zmm28, %zmm18 {%k1} {z}
// CHECK: encoding: [0x62,0x81,0x9d,0xc1,0x56,0xd0]
vorpd %zmm24, %zmm28, %zmm18 {%k1} {z}
// CHECK: vorpd (%rcx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x11]
vorpd (%rcx), %zmm28, %zmm18
// CHECK: vorpd 4660(%rax,%r14,8), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xa1,0x9d,0x40,0x56,0x94,0xf0,0x34,0x12,0x00,0x00]
vorpd 4660(%rax,%r14,8), %zmm28, %zmm18
// CHECK: vorpd (%rcx){1to8}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x11]
vorpd (%rcx){1to8}, %zmm28, %zmm18
// CHECK: vorpd 8128(%rdx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x52,0x7f]
vorpd 8128(%rdx), %zmm28, %zmm18
// CHECK: vorpd 8192(%rdx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x92,0x00,0x20,0x00,0x00]
vorpd 8192(%rdx), %zmm28, %zmm18
// CHECK: vorpd -8192(%rdx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x52,0x80]
vorpd -8192(%rdx), %zmm28, %zmm18
// CHECK: vorpd -8256(%rdx), %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x40,0x56,0x92,0xc0,0xdf,0xff,0xff]
vorpd -8256(%rdx), %zmm28, %zmm18
// CHECK: vorpd 1016(%rdx){1to8}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x52,0x7f]
vorpd 1016(%rdx){1to8}, %zmm28, %zmm18
// CHECK: vorpd 1024(%rdx){1to8}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x92,0x00,0x04,0x00,0x00]
vorpd 1024(%rdx){1to8}, %zmm28, %zmm18
// CHECK: vorpd -1024(%rdx){1to8}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x52,0x80]
vorpd -1024(%rdx){1to8}, %zmm28, %zmm18
// CHECK: vorpd -1032(%rdx){1to8}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0xe1,0x9d,0x50,0x56,0x92,0xf8,0xfb,0xff,0xff]
vorpd -1032(%rdx){1to8}, %zmm28, %zmm18
// CHECK: vorps %zmm23, %zmm17, %zmm28
// CHECK: encoding: [0x62,0x21,0x74,0x40,0x56,0xe7]
vorps %zmm23, %zmm17, %zmm28
// CHECK: vorps %zmm23, %zmm17, %zmm28 {%k7}
// CHECK: encoding: [0x62,0x21,0x74,0x47,0x56,0xe7]
vorps %zmm23, %zmm17, %zmm28 {%k7}
// CHECK: vorps %zmm23, %zmm17, %zmm28 {%k7} {z}
// CHECK: encoding: [0x62,0x21,0x74,0xc7,0x56,0xe7]
vorps %zmm23, %zmm17, %zmm28 {%k7} {z}
// CHECK: vorps (%rcx), %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x21]
vorps (%rcx), %zmm17, %zmm28
// CHECK: vorps 4660(%rax,%r14,8), %zmm17, %zmm28
// CHECK: encoding: [0x62,0x21,0x74,0x40,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00]
vorps 4660(%rax,%r14,8), %zmm17, %zmm28
// CHECK: vorps (%rcx){1to16}, %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x21]
vorps (%rcx){1to16}, %zmm17, %zmm28
// CHECK: vorps 8128(%rdx), %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x62,0x7f]
vorps 8128(%rdx), %zmm17, %zmm28
// CHECK: vorps 8192(%rdx), %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0xa2,0x00,0x20,0x00,0x00]
vorps 8192(%rdx), %zmm17, %zmm28
// CHECK: vorps -8192(%rdx), %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0x62,0x80]
vorps -8192(%rdx), %zmm17, %zmm28
// CHECK: vorps -8256(%rdx), %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x40,0x56,0xa2,0xc0,0xdf,0xff,0xff]
vorps -8256(%rdx), %zmm17, %zmm28
// CHECK: vorps 508(%rdx){1to16}, %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x62,0x7f]
vorps 508(%rdx){1to16}, %zmm17, %zmm28
// CHECK: vorps 512(%rdx){1to16}, %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0xa2,0x00,0x02,0x00,0x00]
vorps 512(%rdx){1to16}, %zmm17, %zmm28
// CHECK: vorps -512(%rdx){1to16}, %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0x62,0x80]
vorps -512(%rdx){1to16}, %zmm17, %zmm28
// CHECK: vorps -516(%rdx){1to16}, %zmm17, %zmm28
// CHECK: encoding: [0x62,0x61,0x74,0x50,0x56,0xa2,0xfc,0xfd,0xff,0xff]
vorps -516(%rdx){1to16}, %zmm17, %zmm28
// CHECK: vxorpd %zmm27, %zmm18, %zmm28
// CHECK: encoding: [0x62,0x01,0xed,0x40,0x57,0xe3]
vxorpd %zmm27, %zmm18, %zmm28
// CHECK: vxorpd %zmm27, %zmm18, %zmm28 {%k4}
// CHECK: encoding: [0x62,0x01,0xed,0x44,0x57,0xe3]
vxorpd %zmm27, %zmm18, %zmm28 {%k4}
// CHECK: vxorpd %zmm27, %zmm18, %zmm28 {%k4} {z}
// CHECK: encoding: [0x62,0x01,0xed,0xc4,0x57,0xe3]
vxorpd %zmm27, %zmm18, %zmm28 {%k4} {z}
// CHECK: vxorpd (%rcx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x21]
vxorpd (%rcx), %zmm18, %zmm28
// CHECK: vxorpd 4660(%rax,%r14,8), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x21,0xed,0x40,0x57,0xa4,0xf0,0x34,0x12,0x00,0x00]
vxorpd 4660(%rax,%r14,8), %zmm18, %zmm28
// CHECK: vxorpd (%rcx){1to8}, %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x21]
vxorpd (%rcx){1to8}, %zmm18, %zmm28
// CHECK: vxorpd 8128(%rdx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x62,0x7f]
vxorpd 8128(%rdx), %zmm18, %zmm28
// CHECK: vxorpd 8192(%rdx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0xa2,0x00,0x20,0x00,0x00]
vxorpd 8192(%rdx), %zmm18, %zmm28
// CHECK: vxorpd -8192(%rdx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0x62,0x80]
vxorpd -8192(%rdx), %zmm18, %zmm28
// CHECK: vxorpd -8256(%rdx), %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x40,0x57,0xa2,0xc0,0xdf,0xff,0xff]
vxorpd -8256(%rdx), %zmm18, %zmm28
// CHECK: vxorpd 1016(%rdx){1to8}, %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x62,0x7f]
vxorpd 1016(%rdx){1to8}, %zmm18, %zmm28
// CHECK: vxorpd 1024(%rdx){1to8}, %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0xa2,0x00,0x04,0x00,0x00]
vxorpd 1024(%rdx){1to8}, %zmm18, %zmm28
// CHECK: vxorpd -1024(%rdx){1to8}, %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0x62,0x80]
vxorpd -1024(%rdx){1to8}, %zmm18, %zmm28
// CHECK: vxorpd -1032(%rdx){1to8}, %zmm18, %zmm28
// CHECK: encoding: [0x62,0x61,0xed,0x50,0x57,0xa2,0xf8,0xfb,0xff,0xff]
vxorpd -1032(%rdx){1to8}, %zmm18, %zmm28
// CHECK: vxorps %zmm18, %zmm28, %zmm24
// CHECK: encoding: [0x62,0x21,0x1c,0x40,0x57,0xc2]
vxorps %zmm18, %zmm28, %zmm24
// CHECK: vxorps %zmm18, %zmm28, %zmm24 {%k4}
// CHECK: encoding: [0x62,0x21,0x1c,0x44,0x57,0xc2]
vxorps %zmm18, %zmm28, %zmm24 {%k4}
// CHECK: vxorps %zmm18, %zmm28, %zmm24 {%k4} {z}
// CHECK: encoding: [0x62,0x21,0x1c,0xc4,0x57,0xc2]
vxorps %zmm18, %zmm28, %zmm24 {%k4} {z}
// CHECK: vxorps (%rcx), %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x01]
vxorps (%rcx), %zmm28, %zmm24
// CHECK: vxorps 4660(%rax,%r14,8), %zmm28, %zmm24
// CHECK: encoding: [0x62,0x21,0x1c,0x40,0x57,0x84,0xf0,0x34,0x12,0x00,0x00]
vxorps 4660(%rax,%r14,8), %zmm28, %zmm24
// CHECK: vxorps (%rcx){1to16}, %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x01]
vxorps (%rcx){1to16}, %zmm28, %zmm24
// CHECK: vxorps 8128(%rdx), %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x42,0x7f]
vxorps 8128(%rdx), %zmm28, %zmm24
// CHECK: vxorps 8192(%rdx), %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x82,0x00,0x20,0x00,0x00]
vxorps 8192(%rdx), %zmm28, %zmm24
// CHECK: vxorps -8192(%rdx), %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x42,0x80]
vxorps -8192(%rdx), %zmm28, %zmm24
// CHECK: vxorps -8256(%rdx), %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x40,0x57,0x82,0xc0,0xdf,0xff,0xff]
vxorps -8256(%rdx), %zmm28, %zmm24
// CHECK: vxorps 508(%rdx){1to16}, %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x42,0x7f]
vxorps 508(%rdx){1to16}, %zmm28, %zmm24
// CHECK: vxorps 512(%rdx){1to16}, %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x82,0x00,0x02,0x00,0x00]
vxorps 512(%rdx){1to16}, %zmm28, %zmm24
// CHECK: vxorps -512(%rdx){1to16}, %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x42,0x80]
vxorps -512(%rdx){1to16}, %zmm28, %zmm24
// CHECK: vxorps -516(%rdx){1to16}, %zmm28, %zmm24
// CHECK: encoding: [0x62,0x61,0x1c,0x50,0x57,0x82,0xfc,0xfd,0xff,0xff]
vxorps -516(%rdx){1to16}, %zmm28, %zmm24

File diff suppressed because it is too large Load Diff

View File

@ -8955,3 +8955,227 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vprorq $123, -1032(%rdx){1to4}, %ymm21
// CHECK: encoding: [0x62,0xf1,0xd5,0x30,0x72,0x82,0xf8,0xfb,0xff,0xff,0x7b]
vprorq $123, -1032(%rdx){1to4}, %ymm21
// CHECK: vpmuludq %xmm18, %xmm22, %xmm27
// CHECK: encoding: [0x62,0x21,0xcd,0x00,0xf4,0xda]
vpmuludq %xmm18, %xmm22, %xmm27
// CHECK: vpmuludq %xmm18, %xmm22, %xmm27 {%k5}
// CHECK: encoding: [0x62,0x21,0xcd,0x05,0xf4,0xda]
vpmuludq %xmm18, %xmm22, %xmm27 {%k5}
// CHECK: vpmuludq %xmm18, %xmm22, %xmm27 {%k5} {z}
// CHECK: encoding: [0x62,0x21,0xcd,0x85,0xf4,0xda]
vpmuludq %xmm18, %xmm22, %xmm27 {%k5} {z}
// CHECK: vpmuludq (%rcx), %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x19]
vpmuludq (%rcx), %xmm22, %xmm27
// CHECK: vpmuludq 291(%rax,%r14,8), %xmm22, %xmm27
// CHECK: encoding: [0x62,0x21,0xcd,0x00,0xf4,0x9c,0xf0,0x23,0x01,0x00,0x00]
vpmuludq 291(%rax,%r14,8), %xmm22, %xmm27
// CHECK: vpmuludq (%rcx){1to2}, %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x19]
vpmuludq (%rcx){1to2}, %xmm22, %xmm27
// CHECK: vpmuludq 2032(%rdx), %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x5a,0x7f]
vpmuludq 2032(%rdx), %xmm22, %xmm27
// CHECK: vpmuludq 2048(%rdx), %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x9a,0x00,0x08,0x00,0x00]
vpmuludq 2048(%rdx), %xmm22, %xmm27
// CHECK: vpmuludq -2048(%rdx), %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x5a,0x80]
vpmuludq -2048(%rdx), %xmm22, %xmm27
// CHECK: vpmuludq -2064(%rdx), %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x00,0xf4,0x9a,0xf0,0xf7,0xff,0xff]
vpmuludq -2064(%rdx), %xmm22, %xmm27
// CHECK: vpmuludq 1016(%rdx){1to2}, %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x5a,0x7f]
vpmuludq 1016(%rdx){1to2}, %xmm22, %xmm27
// CHECK: vpmuludq 1024(%rdx){1to2}, %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x9a,0x00,0x04,0x00,0x00]
vpmuludq 1024(%rdx){1to2}, %xmm22, %xmm27
// CHECK: vpmuludq -1024(%rdx){1to2}, %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x5a,0x80]
vpmuludq -1024(%rdx){1to2}, %xmm22, %xmm27
// CHECK: vpmuludq -1032(%rdx){1to2}, %xmm22, %xmm27
// CHECK: encoding: [0x62,0x61,0xcd,0x10,0xf4,0x9a,0xf8,0xfb,0xff,0xff]
vpmuludq -1032(%rdx){1to2}, %xmm22, %xmm27
// CHECK: vpmuludq %ymm18, %ymm27, %ymm30
// CHECK: encoding: [0x62,0x21,0xa5,0x20,0xf4,0xf2]
vpmuludq %ymm18, %ymm27, %ymm30
// CHECK: vpmuludq %ymm18, %ymm27, %ymm30 {%k7}
// CHECK: encoding: [0x62,0x21,0xa5,0x27,0xf4,0xf2]
vpmuludq %ymm18, %ymm27, %ymm30 {%k7}
// CHECK: vpmuludq %ymm18, %ymm27, %ymm30 {%k7} {z}
// CHECK: encoding: [0x62,0x21,0xa5,0xa7,0xf4,0xf2]
vpmuludq %ymm18, %ymm27, %ymm30 {%k7} {z}
// CHECK: vpmuludq (%rcx), %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x31]
vpmuludq (%rcx), %ymm27, %ymm30
// CHECK: vpmuludq 291(%rax,%r14,8), %ymm27, %ymm30
// CHECK: encoding: [0x62,0x21,0xa5,0x20,0xf4,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpmuludq 291(%rax,%r14,8), %ymm27, %ymm30
// CHECK: vpmuludq (%rcx){1to4}, %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x31]
vpmuludq (%rcx){1to4}, %ymm27, %ymm30
// CHECK: vpmuludq 4064(%rdx), %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x72,0x7f]
vpmuludq 4064(%rdx), %ymm27, %ymm30
// CHECK: vpmuludq 4096(%rdx), %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0xb2,0x00,0x10,0x00,0x00]
vpmuludq 4096(%rdx), %ymm27, %ymm30
// CHECK: vpmuludq -4096(%rdx), %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0x72,0x80]
vpmuludq -4096(%rdx), %ymm27, %ymm30
// CHECK: vpmuludq -4128(%rdx), %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x20,0xf4,0xb2,0xe0,0xef,0xff,0xff]
vpmuludq -4128(%rdx), %ymm27, %ymm30
// CHECK: vpmuludq 1016(%rdx){1to4}, %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x72,0x7f]
vpmuludq 1016(%rdx){1to4}, %ymm27, %ymm30
// CHECK: vpmuludq 1024(%rdx){1to4}, %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0xb2,0x00,0x04,0x00,0x00]
vpmuludq 1024(%rdx){1to4}, %ymm27, %ymm30
// CHECK: vpmuludq -1024(%rdx){1to4}, %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0x72,0x80]
vpmuludq -1024(%rdx){1to4}, %ymm27, %ymm30
// CHECK: vpmuludq -1032(%rdx){1to4}, %ymm27, %ymm30
// CHECK: encoding: [0x62,0x61,0xa5,0x30,0xf4,0xb2,0xf8,0xfb,0xff,0xff]
vpmuludq -1032(%rdx){1to4}, %ymm27, %ymm30
// CHECK: vpmuldq %xmm26, %xmm24, %xmm25
// CHECK: encoding: [0x62,0x02,0xbd,0x00,0x28,0xca]
vpmuldq %xmm26, %xmm24, %xmm25
// CHECK: vpmuldq %xmm26, %xmm24, %xmm25 {%k3}
// CHECK: encoding: [0x62,0x02,0xbd,0x03,0x28,0xca]
vpmuldq %xmm26, %xmm24, %xmm25 {%k3}
// CHECK: vpmuldq %xmm26, %xmm24, %xmm25 {%k3} {z}
// CHECK: encoding: [0x62,0x02,0xbd,0x83,0x28,0xca]
vpmuldq %xmm26, %xmm24, %xmm25 {%k3} {z}
// CHECK: vpmuldq (%rcx), %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x09]
vpmuldq (%rcx), %xmm24, %xmm25
// CHECK: vpmuldq 291(%rax,%r14,8), %xmm24, %xmm25
// CHECK: encoding: [0x62,0x22,0xbd,0x00,0x28,0x8c,0xf0,0x23,0x01,0x00,0x00]
vpmuldq 291(%rax,%r14,8), %xmm24, %xmm25
// CHECK: vpmuldq (%rcx){1to2}, %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x09]
vpmuldq (%rcx){1to2}, %xmm24, %xmm25
// CHECK: vpmuldq 2032(%rdx), %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x4a,0x7f]
vpmuldq 2032(%rdx), %xmm24, %xmm25
// CHECK: vpmuldq 2048(%rdx), %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x8a,0x00,0x08,0x00,0x00]
vpmuldq 2048(%rdx), %xmm24, %xmm25
// CHECK: vpmuldq -2048(%rdx), %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x4a,0x80]
vpmuldq -2048(%rdx), %xmm24, %xmm25
// CHECK: vpmuldq -2064(%rdx), %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x00,0x28,0x8a,0xf0,0xf7,0xff,0xff]
vpmuldq -2064(%rdx), %xmm24, %xmm25
// CHECK: vpmuldq 1016(%rdx){1to2}, %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x4a,0x7f]
vpmuldq 1016(%rdx){1to2}, %xmm24, %xmm25
// CHECK: vpmuldq 1024(%rdx){1to2}, %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x8a,0x00,0x04,0x00,0x00]
vpmuldq 1024(%rdx){1to2}, %xmm24, %xmm25
// CHECK: vpmuldq -1024(%rdx){1to2}, %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x4a,0x80]
vpmuldq -1024(%rdx){1to2}, %xmm24, %xmm25
// CHECK: vpmuldq -1032(%rdx){1to2}, %xmm24, %xmm25
// CHECK: encoding: [0x62,0x62,0xbd,0x10,0x28,0x8a,0xf8,0xfb,0xff,0xff]
vpmuldq -1032(%rdx){1to2}, %xmm24, %xmm25
// CHECK: vpmuldq %ymm26, %ymm18, %ymm19
// CHECK: encoding: [0x62,0x82,0xed,0x20,0x28,0xda]
vpmuldq %ymm26, %ymm18, %ymm19
// CHECK: vpmuldq %ymm26, %ymm18, %ymm19 {%k6}
// CHECK: encoding: [0x62,0x82,0xed,0x26,0x28,0xda]
vpmuldq %ymm26, %ymm18, %ymm19 {%k6}
// CHECK: vpmuldq %ymm26, %ymm18, %ymm19 {%k6} {z}
// CHECK: encoding: [0x62,0x82,0xed,0xa6,0x28,0xda]
vpmuldq %ymm26, %ymm18, %ymm19 {%k6} {z}
// CHECK: vpmuldq (%rcx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x19]
vpmuldq (%rcx), %ymm18, %ymm19
// CHECK: vpmuldq 291(%rax,%r14,8), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xa2,0xed,0x20,0x28,0x9c,0xf0,0x23,0x01,0x00,0x00]
vpmuldq 291(%rax,%r14,8), %ymm18, %ymm19
// CHECK: vpmuldq (%rcx){1to4}, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x19]
vpmuldq (%rcx){1to4}, %ymm18, %ymm19
// CHECK: vpmuldq 4064(%rdx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x5a,0x7f]
vpmuldq 4064(%rdx), %ymm18, %ymm19
// CHECK: vpmuldq 4096(%rdx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x9a,0x00,0x10,0x00,0x00]
vpmuldq 4096(%rdx), %ymm18, %ymm19
// CHECK: vpmuldq -4096(%rdx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x5a,0x80]
vpmuldq -4096(%rdx), %ymm18, %ymm19
// CHECK: vpmuldq -4128(%rdx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x20,0x28,0x9a,0xe0,0xef,0xff,0xff]
vpmuldq -4128(%rdx), %ymm18, %ymm19
// CHECK: vpmuldq 1016(%rdx){1to4}, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x5a,0x7f]
vpmuldq 1016(%rdx){1to4}, %ymm18, %ymm19
// CHECK: vpmuldq 1024(%rdx){1to4}, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0x00,0x04,0x00,0x00]
vpmuldq 1024(%rdx){1to4}, %ymm18, %ymm19
// CHECK: vpmuldq -1024(%rdx){1to4}, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x5a,0x80]
vpmuldq -1024(%rdx){1to4}, %ymm18, %ymm19
// CHECK: vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0xed,0x30,0x28,0x9a,0xf8,0xfb,0xff,0xff]
vpmuldq -1032(%rdx){1to4}, %ymm18, %ymm19