mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-03 02:31:26 +00:00
AVX-512: Implemented missing encoding and intrinsics for FMA instructions
Added tests for DAG lowering ,encoding and intrinsics Differential Revision: http://reviews.llvm.org/D10796 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240926 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
85c698064c
commit
048a1eb977
@ -2450,36 +2450,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2504,36 +2475,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2558,36 +2499,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2612,36 +2523,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2660,36 +2541,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2708,36 +2559,403 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmadd_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmadd_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask3">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmadd_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd128_maskz">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmadd_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmadd_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask3">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmadd_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd256_maskz">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmadd_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmadd_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask3">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmadd_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd512_maskz">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmadd_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmadd_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps128_mask3">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmadd_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps128_maskz">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmadd_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmadd_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps256_mask3">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmadd_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps256_maskz">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmadd_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmadd_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps512_mask3">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmadd_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddps512_maskz">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmaddsub_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmaddsub_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask3">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmaddsub_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_maskz">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmaddsub_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmaddsub_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask3">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmaddsub_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_maskz">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmaddsub_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmaddsub_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask3">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmaddsub_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_maskz">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmaddsub_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmaddsub_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask3">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmaddsub_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps128_maskz">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmaddsub_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmaddsub_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask3">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmaddsub_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps256_maskz">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmaddsub_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmaddsub_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask3">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_vfmaddsub_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps512_maskz">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsub_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask3">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsub_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask3">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsub_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask3">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsub_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubps128_mask3">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsub_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubps256_mask3">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsub_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubps512_mask3">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsubadd_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask3">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsubadd_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask3">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsubadd_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask3">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsubadd_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask3">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsubadd_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask3">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfmsubadd_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask3">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmadd_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmadd_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmadd_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmadd_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmadd_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmadd_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmsub_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfnmsub_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask3">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmsub_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfnmsub_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask3">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmsub_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfnmsub_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask3">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmsub_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfnmsub_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask3">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmsub_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfnmsub_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask3">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfnmsub_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask3_vfnmsub_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask3">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -15126,7 +15126,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case VPERM_3OP_MASKZ:
|
||||
case VPERM_3OP_MASK:
|
||||
case VPERM_3OP_MASK:
|
||||
case FMA_OP_MASK3:
|
||||
case FMA_OP_MASKZ:
|
||||
case FMA_OP_MASK: {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
@ -15134,9 +15135,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue PassThru =
|
||||
(IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) ?
|
||||
getZeroVector(VT, Subtarget, DAG, dl) : Src1;
|
||||
SDValue PassThru = SDValue();
|
||||
|
||||
// set PassThru element
|
||||
if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ)
|
||||
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
else if (IntrData->Type == FMA_OP_MASK3)
|
||||
PassThru = Src3;
|
||||
else
|
||||
PassThru = Src1;
|
||||
|
||||
// We specify 2 possible opcodes for intrinsics with rounding modes.
|
||||
// First, we check if the intrinsic may have non-default rounding mode,
|
||||
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
|
||||
|
@ -4005,147 +4005,203 @@ let Predicates = [HasAVX512] in {
|
||||
//
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
|
||||
multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
SDPatternOperator OpNode = null_frag> {
|
||||
multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
let mayLoad = 1 in
|
||||
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
let mayLoad = 1 in {
|
||||
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
|
||||
!strconcat("$src2, ${src3}", _.BroadcastStr ),
|
||||
(OpNode _.RC:$src1,
|
||||
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
|
||||
AVX512FMA3Base, EVEX_B;
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
}
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
|
||||
multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
|
||||
X86VectorVTInfo _,
|
||||
SDPatternOperator OpNode> {
|
||||
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
|
||||
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
|
||||
(_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
|
||||
AVX512FMA3Base, EVEX_B, EVEX_RC;
|
||||
}
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
|
||||
X86VectorVTInfo VTI, SDPatternOperator OpNode> {
|
||||
defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
|
||||
VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
|
||||
}
|
||||
|
||||
multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
|
||||
string OpcodeStr, X86VectorVTInfo VTI,
|
||||
SDPatternOperator OpNode> {
|
||||
defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
|
||||
VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
|
||||
defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
|
||||
VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
|
||||
}
|
||||
|
||||
multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
|
||||
string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
SDPatternOperator OpNodeRnd> {
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v16f32_info, OpNode>,
|
||||
avx512_fma3_round_forms<opc213, OpcodeStr,
|
||||
v16f32_info, OpNodeRnd>, EVEX_V512;
|
||||
defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v8f32x_info, OpNode>, EVEX_V256;
|
||||
defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v4f32x_info, OpNode>, EVEX_V128;
|
||||
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
|
||||
avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
|
||||
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v8f64_info, OpNode>,
|
||||
avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
|
||||
OpNodeRnd>, EVEX_V512, VEX_W;
|
||||
defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v4f64x_info, OpNode>,
|
||||
EVEX_V256, VEX_W;
|
||||
defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v2f64x_info, OpNode>,
|
||||
EVEX_V128, VEX_W;
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
|
||||
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
|
||||
defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
|
||||
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
|
||||
defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
|
||||
defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>;
|
||||
defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
|
||||
defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
|
||||
defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
|
||||
multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode OpNodeRnd > {
|
||||
defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
|
||||
avx512vl_f32_info>;
|
||||
defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
|
||||
avx512vl_f64_info>, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
|
||||
defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
|
||||
defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
|
||||
defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
|
||||
defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
|
||||
defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
|
||||
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
let mayLoad = 1 in
|
||||
def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
|
||||
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2),
|
||||
_.RC:$src3)))]>;
|
||||
def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
|
||||
!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr,
|
||||
", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
|
||||
[(set _.RC:$dst,
|
||||
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2))),
|
||||
_.RC:$src3))]>, EVEX_B;
|
||||
multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
let mayLoad = 1 in {
|
||||
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
|
||||
"$src2, ${src3}"##_.BroadcastStr,
|
||||
(_.VT (OpNode _.RC:$src2,
|
||||
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
|
||||
_.RC:$src1))>, AVX512FMA3Base, EVEX_B;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
|
||||
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
|
||||
(_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
|
||||
AVX512FMA3Base, EVEX_B, EVEX_RC;
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps,
|
||||
OpNode,v16f32_info>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps,
|
||||
OpNode, v8f32x_info>, EVEX_V256,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps,
|
||||
OpNode, v4f32x_info>, EVEX_V128,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
|
||||
avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
|
||||
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd,
|
||||
OpNode, v8f64_info>, EVEX_V512,
|
||||
VEX_W, EVEX_CD8<32, CD8VF>;
|
||||
defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd,
|
||||
OpNode, v4f64x_info>, EVEX_V256,
|
||||
VEX_W, EVEX_CD8<32, CD8VF>;
|
||||
defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd,
|
||||
OpNode, v2f64x_info>, EVEX_V128,
|
||||
VEX_W, EVEX_CD8<32, CD8VF>;
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
|
||||
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
|
||||
defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
|
||||
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>;
|
||||
defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>;
|
||||
defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>;
|
||||
defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
|
||||
defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
|
||||
defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
|
||||
multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode OpNodeRnd > {
|
||||
defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
|
||||
avx512vl_f32_info>;
|
||||
defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
|
||||
avx512vl_f64_info>, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
|
||||
defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
|
||||
defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
|
||||
defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
|
||||
defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
|
||||
defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src3, _.RC:$src2),
|
||||
OpcodeStr, "$src2, $src3", "$src3, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
let mayLoad = 1 in {
|
||||
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src3, _.MemOp:$src2),
|
||||
OpcodeStr, "$src2, $src3", "$src3, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src3, _.ScalarMemOp:$src2),
|
||||
OpcodeStr, "${src2}"##_.BroadcastStr##", $src3",
|
||||
"$src3, ${src2}"##_.BroadcastStr,
|
||||
(_.VT (OpNode _.RC:$src1,
|
||||
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
|
||||
_.RC:$src3))>, AVX512FMA3Base, EVEX_B;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc),
|
||||
OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc",
|
||||
(_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
|
||||
AVX512FMA3Base, EVEX_B, EVEX_RC;
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
|
||||
avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
|
||||
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
|
||||
}
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
|
||||
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
|
||||
defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
|
||||
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SDNode OpNodeRnd > {
|
||||
defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
|
||||
avx512vl_f32_info>;
|
||||
defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
|
||||
avx512vl_f64_info>, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
|
||||
defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
|
||||
defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
|
||||
defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
|
||||
defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
|
||||
defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
|
||||
|
||||
// Scalar FMA
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
|
@ -22,7 +22,7 @@ enum IntrinsicType {
|
||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
|
||||
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
|
||||
INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, VPERM_3OP_MASK,
|
||||
INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
|
||||
VPERM_3OP_MASKZ,
|
||||
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
|
||||
EXPAND_FROM_MEM, BLEND
|
||||
@ -268,6 +268,52 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
|
||||
X86ISD::FMADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD,
|
||||
X86ISD::FMADD_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_512, FMA_OP_MASK3, X86ISD::FMSUB,
|
||||
X86ISD::FMSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
|
||||
X86ISD::FMSUB_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
|
||||
X86ISD::FMSUBADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
|
||||
X86ISD::FMSUBADD_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_512, FMA_OP_MASK3, X86ISD::FNMSUB,
|
||||
X86ISD::FNMSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
|
||||
X86ISD::FNMSUB_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
|
||||
@ -714,6 +760,44 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
|
||||
X86ISD::FMADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
|
||||
X86ISD::FMADD_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
|
||||
X86ISD::FNMADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
|
||||
X86ISD::FNMADD_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
|
||||
X86ISD::FNMSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
|
||||
X86ISD::FNMSUB_RND),
|
||||
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
|
||||
@ -802,6 +886,25 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,
|
||||
X86ISD::FMADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD,
|
||||
X86ISD::FMADD_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
|
||||
@ -855,54 +958,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
|
||||
X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
|
||||
X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
|
||||
X86ISD::FMADD_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
|
||||
X86ISD::FMADD_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512, FMA_OP_MASK, X86ISD::FMSUB,
|
||||
X86ISD::FMSUB_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512, FMA_OP_MASK, X86ISD::FMSUB,
|
||||
X86ISD::FMSUB_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
|
||||
X86ISD::FMSUBADD_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
|
||||
X86ISD::FMSUBADD_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
|
||||
X86ISD::FNMADD_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
|
||||
X86ISD::FNMADD_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
|
||||
X86ISD::FNMSUB_RND),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
|
||||
X86ISD::FNMSUB_RND),
|
||||
X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0),
|
||||
|
@ -1,422 +1,675 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s
|
||||
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubpd_z
|
||||
; CHECK: vfmsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsub_pd
|
||||
; CHECK: vfmsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_ps_z
|
||||
; CHECK: vfnmadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmadd_ps
|
||||
; CHECK: vfnmadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_pd_z
|
||||
; CHECK: vfnmadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmadd_pd
|
||||
; CHECK: vfnmadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubps_z
|
||||
; CHECK: vfnmsub213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmsub_ps
|
||||
; CHECK: vfnmsub213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubpd_z
|
||||
; CHECK: vfnmsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmsub_pd
|
||||
; CHECK: vfnmsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmaddsubps_z
|
||||
; CHECK: vfmaddsub213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_fmaddsub_ps:
|
||||
; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmaddsubpd_z
|
||||
; CHECK: vfmaddsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmaddsub_pd
|
||||
; CHECK: vfmaddsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubaddps_z
|
||||
; CHECK: vfmsubadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_mask_vfmsubadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd_ps
|
||||
; CHECK: vfmsubadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubaddpd_z
|
||||
; CHECK: vfmsubadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double> @test_mask_vfmsubadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd_pd
|
||||
; CHECK: vfmsubadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
|
||||
; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
|
||||
; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
|
||||
; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
|
||||
; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
|
||||
; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
|
||||
; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
|
||||
; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
|
||||
; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
|
||||
; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
|
||||
; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
|
||||
; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
|
||||
ret <16 x float> %res
|
||||
declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
|
||||
; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
|
||||
; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
|
||||
; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
|
||||
; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
|
||||
; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
|
||||
; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
|
||||
; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
|
||||
; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
|
||||
; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2]
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
|
||||
; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
|
||||
; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
|
||||
; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
|
||||
; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
|
||||
; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
|
||||
; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
|
||||
; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
|
||||
; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
|
||||
; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
|
||||
; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
|
||||
; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
|
||||
; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
|
||||
; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
|
||||
; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
|
||||
; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
|
||||
; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
|
||||
; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
|
||||
; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
|
||||
; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
|
||||
; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm2, %zmm3
|
||||
; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x double> %res, %res1
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
|
||||
%res2 = fadd <16 x float> %res, %res1
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX
|
||||
|
||||
; CHECK-LABEL: test_x86_fmadd_ps_z
|
||||
; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0
|
||||
@ -64,20 +65,94 @@ define double @test_x86_fmsub_sd_z(double %a0, double %a1, double %a2) {
|
||||
ret double %res
|
||||
}
|
||||
|
||||
;CHECK-LABEL: test132_br
|
||||
;CHECK: vfmadd132ps LCP{{.*}}(%rip){1to16}
|
||||
;CHECK: ret
|
||||
define <16 x float> @test132_br(<16 x float> %a1, <16 x float> %a2) nounwind {
|
||||
define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
|
||||
; CHECK-LABEL: test231_br:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
|
||||
%b2 = fadd <16 x float> %b1, %a2
|
||||
ret <16 x float> %b2
|
||||
}
|
||||
|
||||
;CHECK-LABEL: test213_br
|
||||
;CHECK: vfmadd213ps LCP{{.*}}(%rip){1to16}
|
||||
;CHECK: ret
|
||||
define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
|
||||
; CHECK-LABEL: test213_br:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%b1 = fmul <16 x float> %a1, %a2
|
||||
%b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
|
||||
ret <16 x float> %b2
|
||||
}
|
||||
|
||||
;mask (a*c+b , a)
|
||||
define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
|
||||
; CHECK-LABEL: test_x86_fmadd132_ps:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
|
||||
; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; CHECK-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_x86_fmadd132_ps:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovb2m %xmm2, %k1
|
||||
; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
|
||||
%x = fmul <16 x float> %a0, %a2
|
||||
%y = fadd <16 x float> %x, %a1
|
||||
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;mask (a*c+b , b)
|
||||
define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
|
||||
; CHECK-LABEL: test_x86_fmadd231_ps:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
|
||||
; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; CHECK-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_x86_fmadd231_ps:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovb2m %xmm2, %k1
|
||||
; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
|
||||
%x = fmul <16 x float> %a0, %a2
|
||||
%y = fadd <16 x float> %x, %a1
|
||||
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
;mask (b*a+c , b)
|
||||
define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
|
||||
; CHECK-LABEL: test_x86_fmadd213_ps:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
|
||||
; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; CHECK-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_x86_fmadd213_ps:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovb2m %xmm2, %k1
|
||||
; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
|
||||
%x = fmul <16 x float> %a1, %a0
|
||||
%y = fadd <16 x float> %x, %a2
|
||||
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user