Support for encoding all FMA4 instructions and tablegen patterns for all

remaining FMA4 instructions and intrinsics with tests.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145525 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jan Sjödin 2011-11-30 22:09:42 +00:00
parent 3dad610aaa
commit dd649e35e5
4 changed files with 1090 additions and 1 deletions

View File

@ -1825,10 +1825,138 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// FMA4
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmaddsub_ps_256 :
GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmaddsub_pd_256 :
GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsubadd_ps_256 :
GCCBuiltin<"__builtin_ia32_vfmsubaddps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma4_vfmsubadd_pd_256 :
GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//

View File

@ -83,12 +83,74 @@ multiclass fma4s<bits<8> opc, string OpcodeStr> {
}
multiclass fma4p<bits<8> opc, string OpcodeStr> {
def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src2, $src3, $src1, $dst|$dst, $src1, $src3, $src2}"),
[]>, XOP_W;
def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_W;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>;
def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src2, $src3, $src1, $dst|$dst, $src1, $src3, $src2}"),
[]>, XOP_W;
def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_W;
def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>;
}
let isAsmParserOnly = 1 in {
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss">;
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">;
defm VFMADDPS4 : fma4p<0x68, "vfmaddps">;
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd">;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss">;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd">;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps">;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd">;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss">;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd">;
defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps">;
defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd">;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss">;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd">;
defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps">;
defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd">;
defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps">;
defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd">;
defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps">;
defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd">;
}
// FMA4 Intrinsics patterns
// VFMADD
def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
(VFMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
(VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
@ -97,3 +159,290 @@ def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
(VFMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
(VFMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2,
(alignedloadv8f32 addr:$src3)),
(VFMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1,
(alignedloadv8f32 addr:$src2),
VR256:$src3),
(VFMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2,
(alignedloadv4f64 addr:$src3)),
(VFMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1,
(alignedloadv4f64 addr:$src2),
VR256:$src3),
(VFMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
// VFMSUB
def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
(VFMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
(VFMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
(VFMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
(VFMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2,
(alignedloadv8f32 addr:$src3)),
(VFMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1,
(alignedloadv8f32 addr:$src2),
VR256:$src3),
(VFMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2,
(alignedloadv4f64 addr:$src3)),
(VFMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1,
(alignedloadv4f64 addr:$src2),
VR256:$src3),
(VFMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
// VFNMADD
def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
(VFNMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFNMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFNMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
(VFNMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFNMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFNMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
(VFNMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFNMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFNMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
(VFNMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFNMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFNMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFNMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2,
(alignedloadv8f32 addr:$src3)),
(VFNMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1,
(alignedloadv8f32 addr:$src2),
VR256:$src3),
(VFNMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFNMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2,
(alignedloadv4f64 addr:$src3)),
(VFNMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1,
(alignedloadv4f64 addr:$src2),
VR256:$src3),
(VFNMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
// VFNMSUB
def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
(VFNMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFNMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFNMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
(VFNMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFNMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFNMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
(VFNMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFNMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFNMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
(VFNMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFNMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFNMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFNMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2,
(alignedloadv8f32 addr:$src3)),
(VFNMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1,
(alignedloadv8f32 addr:$src2),
VR256:$src3),
(VFNMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFNMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2,
(alignedloadv4f64 addr:$src3)),
(VFNMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1,
(alignedloadv4f64 addr:$src2),
VR256:$src3),
(VFNMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
// VFMADDSUB
def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
(VFMADDSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFMADDSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFMADDSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
(VFMADDSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFMADDSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFMADDSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFMADDSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2,
(alignedloadv8f32 addr:$src3)),
(VFMADDSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1,
(alignedloadv8f32 addr:$src2),
VR256:$src3),
(VFMADDSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFMADDSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2,
(alignedloadv4f64 addr:$src3)),
(VFMADDSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1,
(alignedloadv4f64 addr:$src2),
VR256:$src3),
(VFMADDSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
// VFMSUBADD
def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
(VFMSUBADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2,
(alignedloadv4f32 addr:$src3)),
(VFMSUBADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
VR128:$src3),
(VFMSUBADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
(VFMSUBADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFMSUBADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFMSUBADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFMSUBADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2,
(alignedloadv8f32 addr:$src3)),
(VFMSUBADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1,
(alignedloadv8f32 addr:$src2),
VR256:$src3),
(VFMSUBADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
(VFMSUBADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2,
(alignedloadv4f64 addr:$src3)),
(VFMSUBADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1,
(alignedloadv4f64 addr:$src2),
VR256:$src3),
(VFMSUBADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;

View File

@ -1,4 +1,12 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=fma4 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
; VFMADD
define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmaddss
%res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmaddsd
@ -7,3 +15,229 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double
}
declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmaddps
%res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmaddpd
%res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfmaddps
; CHECK: ymm
%res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfmaddpd
; CHECK: ymm
%res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFMSUB
define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmsubss
%res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmsubsd
%res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmsubps
%res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmsubpd
%res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfmsubps
; CHECK: ymm
%res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfmsubpd
; CHECK: ymm
%res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFNMADD
define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfnmaddss
%res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfnmaddsd
%res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfnmaddps
%res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfnmaddpd
%res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfnmaddps
; CHECK: ymm
%res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfnmaddpd
; CHECK: ymm
%res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFNMSUB
define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfnmsubss
%res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfnmsubsd
%res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfnmsubps
%res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfnmsubpd
%res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfnmsubps
; CHECK: ymm
%res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfnmsubpd
; CHECK: ymm
%res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFMADDSUB
define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmaddsubps
%res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmaddsubpd
%res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfmaddsubps
; CHECK: ymm
%res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfmaddsubpd
; CHECK: ymm
%res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFMSUBADD
define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmsubaddps
%res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmsubaddpd
%res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfmsubaddps
; CHECK: ymm
%res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfmsubaddpd
; CHECK: ymm
%res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone

View File

@ -1,5 +1,18 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
// vfmadd
// CHECK: vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0x01,0x10]
vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x6a,0x01,0x10]
vfmaddss %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
@ -11,3 +24,368 @@
// CHECK: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmaddps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x01,0x10]
vfmaddps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmaddps %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x68,0x01,0x10]
vfmaddps %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmaddpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0x01,0x10]
vfmaddpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmaddpd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x69,0x01,0x10]
vfmaddpd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmaddps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0x01,0x10]
vfmaddps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfmaddps %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x68,0x01,0x10]
vfmaddps %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: vfmaddpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0x01,0x10]
vfmaddpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfmaddpd %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x69,0x01,0x10]
vfmaddpd %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
// vfmsub
// CHECK: vfmsubss (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0x01,0x10]
vfmsubss (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmsubss %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x6e,0x01,0x10]
vfmsubss %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0xc2,0x10]
vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmsubsd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0x01,0x10]
vfmsubsd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmsubsd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x6f,0x01,0x10]
vfmsubsd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0xc2,0x10]
vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmsubps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0x01,0x10]
vfmsubps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmsubps %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x6c,0x01,0x10]
vfmsubps %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmsubpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0x01,0x10]
vfmsubpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmsubpd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x6d,0x01,0x10]
vfmsubpd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmsubps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0x01,0x10]
vfmsubps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfmsubps %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x6c,0x01,0x10]
vfmsubps %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: vfmsubpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0x01,0x10]
vfmsubpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfmsubpd %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x6d,0x01,0x10]
vfmsubpd %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
// vfnmadd
// CHECK: vfnmaddss (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0x01,0x10]
vfnmaddss (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfnmaddss %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x7a,0x01,0x10]
vfnmaddss %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0xc2,0x10]
vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfnmaddsd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0x01,0x10]
vfnmaddsd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfnmaddsd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x7b,0x01,0x10]
vfnmaddsd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0xc2,0x10]
vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfnmaddps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0x01,0x10]
vfnmaddps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfnmaddps %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x78,0x01,0x10]
vfnmaddps %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfnmaddpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0x01,0x10]
vfnmaddpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfnmaddpd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x79,0x01,0x10]
vfnmaddpd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfnmaddps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0x01,0x10]
vfnmaddps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfnmaddps %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x78,0x01,0x10]
vfnmaddps %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: vfnmaddpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0x01,0x10]
vfnmaddpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfnmaddpd %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x79,0x01,0x10]
vfnmaddpd %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
// vfnmsub
// CHECK: vfnmsubss (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0x01,0x10]
vfnmsubss (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfnmsubss %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x7e,0x01,0x10]
vfnmsubss %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0xc2,0x10]
vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfnmsubsd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0x01,0x10]
vfnmsubsd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfnmsubsd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x7f,0x01,0x10]
vfnmsubsd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0xc2,0x10]
vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfnmsubps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0x01,0x10]
vfnmsubps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfnmsubps %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x7c,0x01,0x10]
vfnmsubps %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfnmsubpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0x01,0x10]
vfnmsubpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfnmsubpd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x7d,0x01,0x10]
vfnmsubpd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfnmsubps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0x01,0x10]
vfnmsubps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfnmsubps %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x7c,0x01,0x10]
vfnmsubps %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: vfnmsubpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0x01,0x10]
vfnmsubpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfnmsubpd %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x7d,0x01,0x10]
vfnmsubpd %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
// vfmaddsub
// CHECK: vfmaddsubps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0x01,0x10]
vfmaddsubps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmaddsubps %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x5c,0x01,0x10]
vfmaddsubps %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmaddsubpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0x01,0x10]
vfmaddsubpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmaddsubpd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x5d,0x01,0x10]
vfmaddsubpd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmaddsubps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0x01,0x10]
vfmaddsubps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfmaddsubps %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x5c,0x01,0x10]
vfmaddsubps %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: vfmaddsubpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0x01,0x10]
vfmaddsubpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfmaddsubpd %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x5d,0x01,0x10]
vfmaddsubpd %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
// vfmsubadd
// CHECK: vfmsubaddps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0x01,0x10]
vfmsubaddps (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmsubaddps %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x5e,0x01,0x10]
vfmsubaddps %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmsubaddpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0x01,0x10]
vfmsubaddpd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: vfmsubaddpd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x5f,0x01,0x10]
vfmsubaddpd %xmm1, (%rcx),%xmm0, %xmm0
// CHECK: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: vfmsubaddps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0x01,0x10]
vfmsubaddps (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfmsubaddps %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x5e,0x01,0x10]
vfmsubaddps %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: vfmsubaddpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0x01,0x10]
vfmsubaddpd (%rcx), %ymm1, %ymm0, %ymm0
// CHECK: vfmsubaddpd %ymm1, (%rcx), %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0x7d,0x5f,0x01,0x10]
vfmsubaddpd %ymm1, (%rcx),%ymm0, %ymm0
// CHECK: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0