mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
Replace X86 FMA intrinsic pseduo-instructions with def pats.
It looks like these pseudos were only used for pattern matching. Def pats are the appropriate way to do that. As a bonus, these intrinsics will now have memory operands folded properly, and better FMA3 variants selected where appropriate (see r199933). <rdar://problem/15611947> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200577 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
093b0413fe
commit
f96f832a3c
@ -125,6 +125,7 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
@ -134,30 +135,10 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
|
||||
(OpVT (OpNode RC:$src2, RC:$src1,
|
||||
(mem_frag addr:$src3))))]>;
|
||||
}
|
||||
|
||||
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
|
||||
ComplexPattern mem_cpat, Intrinsic IntId,
|
||||
RegisterClass RC> {
|
||||
let isCodeGenOnly = 1 in {
|
||||
let isCommutable = 1 in
|
||||
def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
|
||||
VR128:$src3))]>;
|
||||
def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
|
||||
} // isCodeGenOnly
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
string OpStr, string PackTy, Intrinsic Int,
|
||||
string OpStr, string PackTy, string PT2, Intrinsic Int,
|
||||
SDNode OpNode, RegisterClass RC, ValueType OpVT,
|
||||
X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
|
||||
ComplexPattern mem_cpat> {
|
||||
@ -169,18 +150,32 @@ let neverHasSideEffects = 1 in {
|
||||
}
|
||||
|
||||
defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
|
||||
x86memop, RC, OpVT, mem_frag, OpNode>,
|
||||
fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
|
||||
memop, mem_cpat, Int, RC>;
|
||||
x86memop, RC, OpVT, mem_frag, OpNode>;
|
||||
}
|
||||
|
||||
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
string OpStr, Intrinsic IntF32, Intrinsic IntF64,
|
||||
SDNode OpNode> {
|
||||
defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
|
||||
defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", IntF32, OpNode,
|
||||
FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
|
||||
defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
|
||||
defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
|
||||
FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
|
||||
|
||||
def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME#"SSr213r")
|
||||
(COPY_TO_REGCLASS $src2, FR32),
|
||||
(COPY_TO_REGCLASS $src1, FR32),
|
||||
(COPY_TO_REGCLASS $src3, FR32)),
|
||||
VR128)>;
|
||||
|
||||
def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME#"SDr213r")
|
||||
(COPY_TO_REGCLASS $src2, FR64),
|
||||
(COPY_TO_REGCLASS $src1, FR64),
|
||||
(COPY_TO_REGCLASS $src3, FR64)),
|
||||
VR128)>;
|
||||
}
|
||||
|
||||
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
|
||||
|
@ -1274,8 +1274,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
|
||||
{ X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 },
|
||||
{ X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 },
|
||||
{ X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 },
|
||||
{ X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 },
|
||||
|
||||
{ X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
|
||||
{ X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
|
||||
@ -1296,8 +1294,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
|
||||
{ X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 },
|
||||
{ X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 },
|
||||
{ X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 },
|
||||
{ X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 },
|
||||
|
||||
{ X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
|
||||
{ X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
|
||||
@ -1318,8 +1314,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
|
||||
{ X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 },
|
||||
{ X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 },
|
||||
{ X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 },
|
||||
{ X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 },
|
||||
|
||||
{ X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
|
||||
{ X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
|
||||
@ -1340,8 +1334,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
|
||||
{ X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 },
|
||||
{ X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 },
|
||||
{ X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 },
|
||||
{ X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 },
|
||||
|
||||
{ X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
|
||||
{ X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
|
||||
|
@ -3,7 +3,7 @@
|
||||
; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
|
||||
|
||||
define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fmadd213ss %xmm
|
||||
; CHECK: fmadd213ss (%r8), %xmm
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
@ -24,7 +24,7 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f
|
||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
|
||||
; CHECK: fnmadd213ss %xmm
|
||||
; CHECK: fnmadd213ss (%r8), %xmm
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user