mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-25 16:30:05 +00:00
AVX-512: Added FMA instructions, intrinsics an tests for KNL and SKX targets
by Asaf Badouh http://reviews.llvm.org/D6456 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224764 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2bea947207
commit
1a637e9fc0
@ -2073,11 +2073,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2107,11 +2127,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2141,11 +2181,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2175,11 +2235,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfnmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2203,11 +2283,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
@ -2231,11 +2331,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_mask_vfmsubadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -17032,6 +17032,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
}
|
||||
case FMA_OP_MASK:
|
||||
{
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
|
||||
dl, Op.getValueType(),
|
||||
Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
Op.getOperand(3)),
|
||||
Op.getOperand(4), Op.getOperand(1),
|
||||
Subtarget, DAG);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -3508,61 +3508,58 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
AVX512FMA3Base;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, _.MemOp:$src3),
|
||||
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2,
|
||||
(_.MemOpFrag addr:$src3))))]>;
|
||||
def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
!strconcat(OpcodeStr, "\t{${src3}", _.BroadcastStr,
|
||||
", $src2, $dst|$dst, $src2, ${src3}", _.BroadcastStr, "}"),
|
||||
[(set _.RC:$dst, (OpNode _.RC:$src1, _.RC:$src2,
|
||||
(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))]>, EVEX_B;
|
||||
}
|
||||
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ),
|
||||
(OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
|
||||
AVX512FMA3Base, EVEX_B;
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
|
||||
string OpcodeStr, X86VectorVTInfo VTI,
|
||||
SDPatternOperator OpNode> {
|
||||
defm v213 : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
|
||||
VTI, OpNode>,
|
||||
EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
|
||||
defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
|
||||
VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
|
||||
|
||||
defm v231 : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
|
||||
VTI>,
|
||||
EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
|
||||
defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
|
||||
VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
|
||||
}
|
||||
|
||||
multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
|
||||
string OpcodeStr,
|
||||
SDPatternOperator OpNode> {
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFMADDPSZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
|
||||
v16f32_info, X86Fmadd>;
|
||||
defm VFMSUBPSZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
|
||||
v16f32_info, X86Fmsub>;
|
||||
defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
|
||||
v16f32_info, X86Fmaddsub>;
|
||||
defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
|
||||
v16f32_info, X86Fmsubadd>;
|
||||
defm VFNMADDPSZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
|
||||
v16f32_info, X86Fnmadd>;
|
||||
defm VFNMSUBPSZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
|
||||
v16f32_info, X86Fnmsub>;
|
||||
}
|
||||
defm NAME##PSZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v16f32_info, OpNode>, EVEX_V512;
|
||||
defm NAME##PSZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v8f32x_info, OpNode>, EVEX_V256;
|
||||
defm NAME##PSZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v4f32x_info, OpNode>, EVEX_V128;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFMADDPDZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
|
||||
v8f64_info, X86Fmadd>, VEX_W;
|
||||
defm VFMSUBPDZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
|
||||
v8f64_info, X86Fmsub>, VEX_W;
|
||||
defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
|
||||
v8f64_info, X86Fmaddsub>, VEX_W;
|
||||
defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
|
||||
v8f64_info, X86Fmsubadd>, VEX_W;
|
||||
defm VFNMADDPDZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
|
||||
v8f64_info, X86Fnmadd>, VEX_W;
|
||||
defm VFNMSUBPDZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
|
||||
v8f64_info, X86Fnmsub>, VEX_W;
|
||||
defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v8f64_info, OpNode>, EVEX_V512, VEX_W;
|
||||
defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v4f64x_info, OpNode>, EVEX_V256, VEX_W;
|
||||
defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
|
||||
v2f64x_info, OpNode>, EVEX_V128, VEX_W;
|
||||
}
|
||||
}
|
||||
|
||||
defm VFMADD : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
|
||||
defm VFMSUB : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
|
||||
defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>;
|
||||
defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
|
||||
defm VFNMADD : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
|
||||
defm VFNMSUB : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
@ -3584,47 +3581,36 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
|
||||
multiclass avx512_fma3p_m132_f<bits<8> opc,
|
||||
string OpcodeStr,
|
||||
SDNode OpNode> {
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", X86Fmadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", X86Fmsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", X86Fmaddsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", X86Fmsubadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", X86Fnmadd,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", X86Fnmsub,
|
||||
v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
}
|
||||
defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps,
|
||||
OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps,
|
||||
OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>;
|
||||
defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps,
|
||||
OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", X86Fmadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", X86Fmsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", X86Fmaddsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", X86Fmsubadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", X86Fnmadd,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", X86Fnmsub,
|
||||
v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd,
|
||||
OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>;
|
||||
defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd,
|
||||
OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>;
|
||||
defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd,
|
||||
OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VFMADD132 : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>;
|
||||
defm VFMSUB132 : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>;
|
||||
defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>;
|
||||
defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
|
||||
defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
|
||||
defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
|
||||
|
||||
|
||||
// Scalar FMA
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -21,7 +21,7 @@ enum IntrinsicType {
|
||||
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
|
||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
|
||||
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
|
||||
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
|
||||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND
|
||||
};
|
||||
|
||||
@ -398,6 +398,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
|
||||
X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
|
||||
X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB , 0),
|
||||
X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
|
||||
X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
|
||||
X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
|
||||
|
@ -8,6 +8,13 @@ define <16 x float> @test_x86_vfmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_mask_vfmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd_ps
|
||||
; CHECK: vfmadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_x86_vfmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmadd_pd_z
|
||||
; CHECK: vfmadd213pd %zmm
|
||||
@ -32,6 +39,13 @@ define <16 x float> @test_x86_vfmsubps_z(<16 x float> %a0, <16 x float> %a1, <16
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_mask_vfmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsub_ps
|
||||
; CHECK: vfmsub213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubpd_z
|
||||
; CHECK: vfmsub213pd %zmm
|
||||
@ -40,6 +54,13 @@ define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsub_pd
|
||||
; CHECK: vfmsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_ps_z
|
||||
; CHECK: vfnmadd213ps %zmm
|
||||
@ -48,6 +69,13 @@ define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmadd_ps
|
||||
; CHECK: vfnmadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_pd_z
|
||||
; CHECK: vfnmadd213pd %zmm
|
||||
@ -56,6 +84,13 @@ define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmadd_pd
|
||||
; CHECK: vfnmadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubps_z
|
||||
; CHECK: vfnmsub213ps %zmm
|
||||
@ -64,6 +99,13 @@ define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <1
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmsub_ps
|
||||
; CHECK: vfnmsub213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubpd_z
|
||||
; CHECK: vfnmsub213pd %zmm
|
||||
@ -72,6 +114,13 @@ define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmsub_pd
|
||||
; CHECK: vfnmsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmaddsubps_z
|
||||
; CHECK: vfmaddsub213ps %zmm
|
||||
@ -96,6 +145,13 @@ define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1,
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmaddsub_pd
|
||||
; CHECK: vfmaddsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubaddps_z
|
||||
; CHECK: vfmsubadd213ps %zmm
|
||||
@ -104,6 +160,13 @@ define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1,
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_mask_vfmsubadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd_ps
|
||||
; CHECK: vfmsubadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubaddpd_z
|
||||
; CHECK: vfmsubadd213pd %zmm
|
||||
@ -111,3 +174,11 @@ define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1,
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_mask_vfmsubadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd_pd
|
||||
; CHECK: vfmsubadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
|
@ -611,3 +611,388 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
|
||||
|
||||
declare <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd256_ps
|
||||
; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2]
|
||||
%res = call <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps
|
||||
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
|
||||
|
||||
define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_fmadd256_pd:
|
||||
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
|
||||
|
||||
define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_fmadd128_pd:
|
||||
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @test_mask_vfmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsub256_ps
|
||||
; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xaa,0xc2]
|
||||
%res = call <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mask_vfmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsub128_ps
|
||||
; CHECK: vfmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaa,0xc2]
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
|
||||
define <4 x double> @test_mask_vfmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsub256_pd
|
||||
; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xaa,0xc2]
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
define <2 x double> @test_mask_vfmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsub128_pd
|
||||
; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaa,0xc2]
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmadd256_ps
|
||||
; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2]
|
||||
%res = call <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmadd128_ps
|
||||
; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2]
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
|
||||
define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmadd256_pd
|
||||
; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2]
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmadd128_pd
|
||||
; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2]
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmsub256_ps
|
||||
; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2]
|
||||
%res = call <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmsub128_ps
|
||||
; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2]
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
|
||||
define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmsub256_pd
|
||||
; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2]
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfnmsub128_pd
|
||||
; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2]
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_fmaddsub256_ps:
|
||||
; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2]
|
||||
%res = call <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_fmaddsub128_ps:
|
||||
; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2]
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
|
||||
define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmaddsub256_pd
|
||||
; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2]
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmaddsub128_pd
|
||||
; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2]
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
define <8 x float> @test_mask_vfmsubadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd256_ps
|
||||
; CHECK: vfmsubadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa7,0xc2]
|
||||
%res = call <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define <4 x float> @test_mask_vfmsubadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd128_ps
|
||||
; CHECK: vfmsubadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa7,0xc2]
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
|
||||
|
||||
define <4 x double> @test_mask_vfmsubadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd256_pd
|
||||
; CHECK: vfmsubadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa7,0xc2]
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
define <2 x double> @test_mask_vfmsubadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd128_pd
|
||||
; CHECK: vfmsubadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0xc2]
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubadd128rm_pd
|
||||
; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07]
|
||||
%a2 = load <2 x double>* %ptr_a2
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmsubaddrm_pd
|
||||
; CHECK: vfmsubadd213pd (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07]
|
||||
%a2 = load <8 x double>* %ptr_a2, align 8
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_r
|
||||
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rz
|
||||
; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
|
||||
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
|
||||
%a2 = load <4 x float>* %ptr_a2
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
|
||||
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
|
||||
%a2 = load <4 x float>* %ptr_a2, align 8
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
|
||||
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
|
||||
%a2 = load <4 x float>* %ptr_a2
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
|
||||
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
|
||||
%a2 = load <4 x float>* %ptr_a2, align 4
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rmb
|
||||
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
|
||||
%q = load float* %ptr_a2
|
||||
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
|
||||
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
|
||||
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
|
||||
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rmba
|
||||
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
|
||||
%q = load float* %ptr_a2, align 4
|
||||
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
|
||||
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
|
||||
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
|
||||
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz
|
||||
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
|
||||
%q = load float* %ptr_a2
|
||||
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
|
||||
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
|
||||
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
|
||||
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza
|
||||
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
|
||||
%q = load float* %ptr_a2, align 4
|
||||
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
|
||||
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
|
||||
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
|
||||
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
|
||||
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_pd_r
|
||||
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_pd_rz
|
||||
; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
|
||||
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
|
||||
%a2 = load <2 x double>* %ptr_a2
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
|
||||
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
|
||||
%a2 = load <2 x double>* %ptr_a2
|
||||
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd256_pd_r
|
||||
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd256_pd_rz
|
||||
; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
|
||||
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
|
||||
%a2 = load <4 x double>* %ptr_a2
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
|
||||
; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
|
||||
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
|
||||
%a2 = load <4 x double>* %ptr_a2
|
||||
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user