mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
[AVX512] Add masking variant for the FMA instructions
This change further evolves the base class AVX512_masking in order to make it suitable for the masking variants of the FMA instructions. Besides AVX512_masking there is now a new base class that instructions including FMAs can use: AVX512_masking_3src. With three-source (destructive) instructions one of the sources is already tied to the destination. This difference from AVX512_masking is captured by this new class. The common bits between _masking and _masking_3src are broken out into a new super class called AVX512_masking_common. As with valign, there is some corresponding restructuring of the underlying format classes. The idea is the same we want to derive from two classes essentially: one providing the format bits and another format-independent multiclass supplying the various masking and non-masking instruction variants. Existing fma tests in avx512-fma*.ll provide coverage here for the non-masking variants. For masking, the next patches in the series will add intrinsics and intrinsic tests. For AVX512_masking_3src to work, the (ins ...) dag has to be passed *without* the leading source operand that is tied to dst ($src1). This is necessary to properly construct the (ins ...) for the different variants. For the record, I did check that if $src is mistakenly included, you do get a fairly intuitive error message from the tablegen backend. Part of <rdar://problem/17688758> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215660 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
14bc045838
commit
265d201e19
@ -1,25 +1,28 @@
|
||||
multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
|
||||
// Common base class of AVX512_masking and AVX512_masking_3src.
|
||||
multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
|
||||
dag MaskingIns, dag ZeroMaskingIns,
|
||||
string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, ValueType OpVT,
|
||||
RegisterClass RC, RegisterClass KRC> {
|
||||
dag RHS, dag MaskingRHS, ValueType OpVT,
|
||||
RegisterClass RC, RegisterClass KRC,
|
||||
string MaskingConstraint = ""> {
|
||||
def NAME: AVX512<O, F, Outs, Ins,
|
||||
OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
|
||||
"$dst, "#IntelSrcAsm#"}",
|
||||
[(set RC:$dst, RHS)]>;
|
||||
|
||||
// Prefer over VMOV*rrk Pat<>
|
||||
let Constraints = "$src0 = $dst", AddedComplexity = 20 in
|
||||
def NAME#k: AVX512<O, F, Outs,
|
||||
!con((ins RC:$src0, KRC:$mask), Ins),
|
||||
let AddedComplexity = 20 in
|
||||
def NAME#k: AVX512<O, F, Outs, MaskingIns,
|
||||
OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
|
||||
"$dst {${mask}}, "#IntelSrcAsm#"}",
|
||||
[(set RC:$dst,
|
||||
(vselect KRC:$mask, RHS, RC:$src0))]>,
|
||||
EVEX_K;
|
||||
[(set RC:$dst, MaskingRHS)]>,
|
||||
EVEX_K {
|
||||
// In case of the 3src subclass this is overridden with a let.
|
||||
string Constraints = MaskingConstraint;
|
||||
}
|
||||
let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
|
||||
def NAME#kz: AVX512<O, F, Outs,
|
||||
!con((ins KRC:$mask), Ins),
|
||||
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
|
||||
OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
|
||||
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
|
||||
[(set RC:$dst,
|
||||
@ -29,6 +32,40 @@ multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
|
||||
EVEX_KZ;
|
||||
}
|
||||
|
||||
// This multiclass generates the unconditional/non-masking, the masking and
|
||||
// the zero-masking variant of the instruction. In the masking case, the
|
||||
// perserved vector elements come from a new dummy input operand tied to $dst.
|
||||
multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
|
||||
string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, ValueType OpVT, RegisterClass RC,
|
||||
RegisterClass KRC> :
|
||||
AVX512_masking_common<O, F, Outs,
|
||||
Ins,
|
||||
!con((ins RC:$src0, KRC:$mask), Ins),
|
||||
!con((ins KRC:$mask), Ins),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
|
||||
"$src0 = $dst">;
|
||||
|
||||
// Similar to AVX512_masking but in this case one of the source operands
|
||||
// ($src1) is already tied to $dst so we just use that for the preserved
|
||||
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
|
||||
// $src1.
|
||||
multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
|
||||
string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, ValueType OpVT,
|
||||
RegisterClass RC, RegisterClass KRC> :
|
||||
AVX512_masking_common<O, F, Outs,
|
||||
!con((ins RC:$src1), NonTiedIns),
|
||||
!con((ins RC:$src1), !con((ins KRC:$mask),
|
||||
NonTiedIns)),
|
||||
!con((ins RC:$src1), !con((ins KRC:$mask),
|
||||
NonTiedIns)),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
|
||||
|
||||
// Bitcasts between 512-bit vector types. Return the original type since
|
||||
// no instruction is needed for the conversion
|
||||
let Predicates = [HasAVX512] in {
|
||||
@ -2955,11 +2992,13 @@ let Constraints = "$src1 = $dst" in {
|
||||
multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass RC, X86MemOperand x86memop,
|
||||
PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
|
||||
string BrdcstStr, SDNode OpNode, ValueType OpVT> {
|
||||
def r: AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr," \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst, (OpVT(OpNode RC:$src1, RC:$src2, RC:$src3)))]>;
|
||||
string BrdcstStr, SDNode OpNode, ValueType OpVT,
|
||||
RegisterClass KRC> {
|
||||
defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src2, RC:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
|
||||
AVX512FMA3Base;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
@ -2979,53 +3018,53 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmadd, v16f32>, EVEX_V512,
|
||||
X86Fmadd, v16f32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmsub, v16f32>, EVEX_V512,
|
||||
X86Fmsub, v16f32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmaddsub, v16f32>,
|
||||
X86Fmaddsub, v16f32, VK16WM>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fmsubadd, v16f32>,
|
||||
X86Fmsubadd, v16f32, VK16WM>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fnmadd, v16f32>, EVEX_V512,
|
||||
X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
|
||||
memopv16f32, f32mem, loadf32, "{1to16}",
|
||||
X86Fnmsub, v16f32>, EVEX_V512,
|
||||
X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmadd, v8f64>, EVEX_V512,
|
||||
X86Fmadd, v8f64, VK8WM>, EVEX_V512,
|
||||
VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmsub, v8f64>, EVEX_V512, VEX_W,
|
||||
X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
X86Fmaddsub, v8f64, VK8WM>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
X86Fmsubadd, v8f64, VK8WM>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
|
||||
X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
|
||||
memopv8f64, f64mem, loadf64, "{1to8}",
|
||||
X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
|
||||
X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
|
@ -722,7 +722,7 @@ class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, InstrItinClass itin = NoItinerary>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512AIi8Base: TAPD {
|
||||
class AVX512AIi8Base : TAPD {
|
||||
Domain ExeDomain = SSEPackedInt;
|
||||
ImmType ImmT = Imm8;
|
||||
}
|
||||
@ -748,6 +748,7 @@ class AVX512FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern, InstrItinClass itin = NoItinerary>
|
||||
: I<o, F, outs, ins, asm, pattern, itin>, T8PD,
|
||||
EVEX_4V, Requires<[HasAVX512]>;
|
||||
class AVX512FMA3Base : T8PD, EVEX_4V;
|
||||
|
||||
class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern, InstrItinClass itin = NoItinerary>
|
||||
|
Loading…
Reference in New Issue
Block a user