AVX-512: Added FMA instructions, intrinsics an tests for KNL and SKX targets

by Asaf Badouh

http://reviews.llvm.org/D6456



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224764 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2014-12-23 10:30:39 +00:00
parent 2bea947207
commit 1a637e9fc0
6 changed files with 677 additions and 81 deletions

View File

@ -2073,11 +2073,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@ -2107,11 +2127,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@ -2141,11 +2181,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@ -2175,11 +2235,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfnmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@ -2203,11 +2283,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@ -2231,11 +2331,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsubadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsubadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsubadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
def int_x86_fma_mask_vfmsubadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//

View File

@ -17032,6 +17032,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
Op.getOperand(2));
}
case FMA_OP_MASK:
{
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
dl, Op.getValueType(),
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3)),
Op.getOperand(4), Op.getOperand(1),
Subtarget, DAG);
}
default:
break;
}

View File

@ -3508,61 +3508,58 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
AVX512FMA3Base;
let mayLoad = 1 in
def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, _.MemOp:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2,
(_.MemOpFrag addr:$src3))))]>;
def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr, "\t{${src3}", _.BroadcastStr,
", $src2, $dst|$dst, $src2, ${src3}", _.BroadcastStr, "}"),
[(set _.RC:$dst, (OpNode _.RC:$src1, _.RC:$src2,
(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))]>, EVEX_B;
}
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
}
} // Constraints = "$src1 = $dst"
multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
string OpcodeStr, X86VectorVTInfo VTI,
SDPatternOperator OpNode> {
defm v213 : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
VTI, OpNode>,
EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
defm v231 : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
VTI>,
EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
}
multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
string OpcodeStr,
SDPatternOperator OpNode> {
let ExeDomain = SSEPackedSingle in {
defm VFMADDPSZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
v16f32_info, X86Fmadd>;
defm VFMSUBPSZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
v16f32_info, X86Fmsub>;
defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
v16f32_info, X86Fmaddsub>;
defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
v16f32_info, X86Fmsubadd>;
defm VFNMADDPSZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
v16f32_info, X86Fnmadd>;
defm VFNMSUBPSZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
v16f32_info, X86Fnmsub>;
}
defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
v16f32_info, OpNode>, EVEX_V512;
defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
v8f32x_info, OpNode>, EVEX_V256;
defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
v4f32x_info, OpNode>, EVEX_V128;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADDPDZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
v8f64_info, X86Fmadd>, VEX_W;
defm VFMSUBPDZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
v8f64_info, X86Fmsub>, VEX_W;
defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
v8f64_info, X86Fmaddsub>, VEX_W;
defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
v8f64_info, X86Fmsubadd>, VEX_W;
defm VFNMADDPDZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
v8f64_info, X86Fnmadd>, VEX_W;
defm VFNMSUBPDZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
v8f64_info, X86Fnmsub>, VEX_W;
defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
v8f64_info, OpNode>, EVEX_V512, VEX_W;
defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
v4f64x_info, OpNode>, EVEX_V256, VEX_W;
defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
v2f64x_info, OpNode>, EVEX_V128, VEX_W;
}
}
defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>;
defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
let Constraints = "$src1 = $dst" in {
multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
@ -3584,47 +3581,36 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
} // Constraints = "$src1 = $dst"
multiclass avx512_fma3p_m132_f<bits<8> opc,
string OpcodeStr,
SDNode OpNode> {
let ExeDomain = SSEPackedSingle in {
defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", X86Fmadd,
v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", X86Fmsub,
v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", X86Fmaddsub,
v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", X86Fmsubadd,
v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", X86Fnmadd,
v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", X86Fnmsub,
v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
}
defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps,
OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps,
OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>;
defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps,
OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", X86Fmadd,
v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", X86Fmsub,
v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", X86Fmaddsub,
v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", X86Fmsubadd,
v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", X86Fnmadd,
v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", X86Fnmsub,
v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd,
OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>;
defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd,
OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>;
defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd,
OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>;
}
}
defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>;
defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>;
defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>;
defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
// Scalar FMA
let Constraints = "$src1 = $dst" in {
multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,

View File

@ -21,7 +21,7 @@ enum IntrinsicType {
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND
};
@ -398,6 +398,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB , 0),
X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),

View File

@ -8,6 +8,13 @@ define <16 x float> @test_x86_vfmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1
}
declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @test_mask_vfmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_vfmadd_ps
; CHECK: vfmadd213ps %zmm
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
ret <16 x float> %res
}
define <8 x double> @test_x86_vfmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfmadd_pd_z
; CHECK: vfmadd213pd %zmm
@ -32,6 +39,13 @@ define <16 x float> @test_x86_vfmsubps_z(<16 x float> %a0, <16 x float> %a1, <16
}
declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @test_mask_vfmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_vfmsub_ps
; CHECK: vfmsub213ps %zmm
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
ret <16 x float> %res
}
define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfmsubpd_z
; CHECK: vfmsub213pd %zmm
@ -40,6 +54,13 @@ define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8
}
declare <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsub_pd
; CHECK: vfmsub213pd %zmm
%res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_x86_vfnmadd_ps_z
; CHECK: vfnmadd213ps %zmm
@ -48,6 +69,13 @@ define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <
}
declare <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_vfnmadd_ps
; CHECK: vfnmadd213ps %zmm
%res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
ret <16 x float> %res
}
define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfnmadd_pd_z
; CHECK: vfnmadd213pd %zmm
@ -56,6 +84,13 @@ define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <
}
declare <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd_pd
; CHECK: vfnmadd213pd %zmm
%res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_x86_vfnmsubps_z
; CHECK: vfnmsub213ps %zmm
@ -64,6 +99,13 @@ define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <1
}
declare <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_vfnmsub_ps
; CHECK: vfnmsub213ps %zmm
%res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
ret <16 x float> %res
}
define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfnmsubpd_z
; CHECK: vfnmsub213pd %zmm
@ -72,6 +114,13 @@ define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8
}
declare <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub_pd
; CHECK: vfnmsub213pd %zmm
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_x86_vfmaddsubps_z
; CHECK: vfmaddsub213ps %zmm
@ -96,6 +145,13 @@ define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1,
}
declare <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmaddsub_pd
; CHECK: vfmaddsub213pd %zmm
%res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_x86_vfmsubaddps_z
; CHECK: vfmsubadd213ps %zmm
@ -104,6 +160,13 @@ define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1,
}
declare <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @test_mask_vfmsubadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
; CHECK-LABEL: test_mask_vfmsubadd_ps
; CHECK: vfmsubadd213ps %zmm
%res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
ret <16 x float> %res
}
define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfmsubaddpd_z
; CHECK: vfmsubadd213pd %zmm
@ -111,3 +174,11 @@ define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1,
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfmsubadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubadd_pd
; CHECK: vfmsubadd213pd %zmm
%res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}

View File

@ -611,3 +611,388 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
}
declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
declare <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd256_ps
; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2]
%res = call <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
ret <8 x float> %res
}
declare <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
declare <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
; CHECK-LABEL: test_mask_fmadd256_pd:
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
ret <4 x double> %res
}
declare <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: test_mask_fmadd128_pd:
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
ret <2 x double> %res
}
declare <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_vfmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsub256_ps
; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xaa,0xc2]
%res = call <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
ret <8 x float> %res
}
declare <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_vfmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsub128_ps
; CHECK: vfmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaa,0xc2]
%res = call <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
declare <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
define <4 x double> @test_mask_vfmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsub256_pd
; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xaa,0xc2]
%res = call <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
declare <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mask_vfmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsub128_pd
; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaa,0xc2]
%res = call <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
declare <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd256_ps
; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2]
%res = call <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
ret <8 x float> %res
}
declare <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd128_ps
; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2]
%res = call <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
declare <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd256_pd
; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2]
%res = call <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
declare <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmadd128_pd
; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2]
%res = call <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
declare <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub256_ps
; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2]
%res = call <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
ret <8 x float> %res
}
declare <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub128_ps
; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2]
%res = call <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
declare <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub256_pd
; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2]
%res = call <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
declare <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfnmsub128_pd
; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2]
%res = call <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
declare <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
; CHECK-LABEL: test_mask_fmaddsub256_ps:
; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2]
%res = call <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
ret <8 x float> %res
}
declare <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: test_mask_fmaddsub128_ps:
; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2]
%res = call <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
ret <4 x float> %res
}
declare <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmaddsub256_pd
; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2]
%res = call <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
declare <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmaddsub128_pd
; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2]
%res = call <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
declare <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <8 x float> @test_mask_vfmsubadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubadd256_ps
; CHECK: vfmsubadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa7,0xc2]
%res = call <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
ret <8 x float> %res
}
declare <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_mask_vfmsubadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubadd128_ps
; CHECK: vfmsubadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa7,0xc2]
%res = call <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
declare <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
define <4 x double> @test_mask_vfmsubadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubadd256_pd
; CHECK: vfmsubadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa7,0xc2]
%res = call <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
declare <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
define <2 x double> @test_mask_vfmsubadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubadd128_pd
; CHECK: vfmsubadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0xc2]
%res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubadd128rm_pd
; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07]
%a2 = load <2 x double>* %ptr_a2
%res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubaddrm_pd
; CHECK: vfmsubadd213pd (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07]
%a2 = load <8 x double>* %ptr_a2, align 8
%res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_r
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rz
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
%a2 = load <4 x float>* %ptr_a2
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
%a2 = load <4 x float>* %ptr_a2, align 8
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
%a2 = load <4 x float>* %ptr_a2
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
%a2 = load <4 x float>* %ptr_a2, align 4
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmb
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
%q = load float* %ptr_a2
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmba
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
%q = load float* %ptr_a2, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
%q = load float* %ptr_a2
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
ret <4 x float> %res
}
define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
%q = load float* %ptr_a2, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
ret <4 x float> %res
}
define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_pd_r
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
; CHECK-LABEL: test_mask_vfmadd128_pd_rz
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
ret <2 x double> %res
}
define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
%a2 = load <2 x double>* %ptr_a2
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
%a2 = load <2 x double>* %ptr_a2
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
ret <2 x double> %res
}
define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd256_pd_r
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
; CHECK-LABEL: test_mask_vfmadd256_pd_rz
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
ret <4 x double> %res
}
define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
%a2 = load <4 x double>* %ptr_a2
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
%a2 = load <4 x double>* %ptr_a2
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
ret <4 x double> %res
}