mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-21 19:32:16 +00:00
Add more fused mul+add/sub patterns. rdar://10139676
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154484 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e611378a6e
commit
14b4c03580
@ -4133,12 +4133,18 @@ def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
|
|||||||
Requires<[HasVFP4,UseFusedMAC]>;
|
Requires<[HasVFP4,UseFusedMAC]>;
|
||||||
|
|
||||||
// Match @llvm.fma.* intrinsics
|
// Match @llvm.fma.* intrinsics
|
||||||
def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)),
|
def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
|
||||||
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
|
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4]>;
|
||||||
def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)),
|
def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
|
||||||
(VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
|
(VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4]>;
|
||||||
|
def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
|
||||||
|
(VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
|
||||||
|
(VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
|
||||||
// Vector Subtract Operations.
|
// Vector Subtract Operations.
|
||||||
|
|
||||||
|
@ -1081,10 +1081,10 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
|||||||
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
||||||
|
|
||||||
// Match @llvm.fma.* intrinsics
|
// Match @llvm.fma.* intrinsics
|
||||||
def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)),
|
def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
|
||||||
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4]>;
|
||||||
def : Pat<(fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm)),
|
def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
|
||||||
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4]>;
|
||||||
|
|
||||||
@ -1114,6 +1114,22 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
|||||||
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
||||||
|
|
||||||
|
// Match @llvm.fma.* intrinsics
|
||||||
|
// (fma (fneg x), y, z) -> (vfms x, y, z)
|
||||||
|
def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
|
||||||
|
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
|
||||||
|
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
// (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
|
||||||
|
def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
|
||||||
|
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
|
||||||
|
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
|
||||||
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
|
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
|
||||||
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
||||||
IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
|
IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
|
||||||
@ -1141,12 +1157,20 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
|
|||||||
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
||||||
|
|
||||||
// Match @llvm.fma.* intrinsics
|
// Match @llvm.fma.* intrinsics
|
||||||
|
// (fneg (fma x, y, z)) -> (vfnma x, y, z)
|
||||||
def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
|
def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
|
||||||
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4]>;
|
||||||
def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
|
def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
|
||||||
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||||
Requires<[HasVFP4]>;
|
Requires<[HasVFP4]>;
|
||||||
|
// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
|
||||||
|
def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
|
||||||
|
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
|
||||||
|
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
|
||||||
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
|
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
|
||||||
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
||||||
@ -1173,6 +1197,22 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
|
|||||||
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
||||||
|
|
||||||
|
// Match @llvm.fma.* intrinsics
|
||||||
|
// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
|
||||||
|
def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
|
||||||
|
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
|
||||||
|
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
// (fma x, (fneg y), z) -> (vnfms x, y, z)
|
||||||
|
def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
|
||||||
|
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
|
||||||
|
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// FP Conditional moves.
|
// FP Conditional moves.
|
||||||
//
|
//
|
||||||
|
@ -103,43 +103,81 @@ define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp {
|
|||||||
entry:
|
entry:
|
||||||
; CHECK: test_fma_f32
|
; CHECK: test_fma_f32
|
||||||
; CHECK: vfma.f32
|
; CHECK: vfma.f32
|
||||||
%call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
|
%tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
|
||||||
ret float %call
|
ret float %tmp1
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: test_fma_f64
|
; CHECK: test_fma_f64
|
||||||
; CHECK: vfma.f64
|
; CHECK: vfma.f64
|
||||||
%call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
|
%tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
|
||||||
ret double %call
|
ret double %tmp1
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
|
define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: test_fma_v2f32
|
; CHECK: test_fma_v2f32
|
||||||
; CHECK: vfma.f32
|
; CHECK: vfma.f32
|
||||||
%0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
|
%tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
|
||||||
ret <2 x float> %0
|
ret <2 x float> %tmp1
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @test_fnma_f32(float %a, float %b, float %c) nounwind readnone ssp {
|
define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: test_fnma_f32
|
; CHECK: test_fms_f64
|
||||||
; CHECK: vfnma.f32
|
; CHECK: vfms.f64
|
||||||
%call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
|
%tmp1 = fsub double -0.0, %a
|
||||||
%tmp1 = fsub float -0.0, %call
|
%tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
|
||||||
%tmp2 = fsub float %tmp1, %c
|
ret double %tmp2
|
||||||
ret float %tmp2
|
}
|
||||||
|
|
||||||
|
define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: test_fms_f64_2
|
||||||
|
; CHECK: vfms.f64
|
||||||
|
%tmp1 = fsub double -0.0, %b
|
||||||
|
%tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
|
||||||
|
%tmp3 = fsub double -0.0, %tmp2
|
||||||
|
ret double %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: test_fnms_f64
|
||||||
|
; CHECK: vfnms.f64
|
||||||
|
%tmp1 = fsub double -0.0, %a
|
||||||
|
%tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
|
||||||
|
%tmp3 = fsub double -0.0, %tmp2
|
||||||
|
ret double %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: test_fnms_f64_2
|
||||||
|
; CHECK: vfnms.f64
|
||||||
|
%tmp1 = fsub double -0.0, %b
|
||||||
|
%tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
|
||||||
|
ret double %tmp2
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: test_fnma_f64
|
; CHECK: test_fnma_f64
|
||||||
; CHECK: vfnma.f64
|
; CHECK: vfnma.f64
|
||||||
%call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
|
%tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
|
||||||
%tmp = fsub double -0.0, %call
|
%tmp2 = fsub double -0.0, %tmp1
|
||||||
ret double %tmp
|
ret double %tmp2
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: test_fnma_f64_2
|
||||||
|
; CHECK: vfnma.f64
|
||||||
|
%tmp1 = fsub double -0.0, %a
|
||||||
|
%tmp2 = fsub double -0.0, %c
|
||||||
|
%tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
|
||||||
|
ret double %tmp3
|
||||||
}
|
}
|
||||||
|
|
||||||
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
||||||
|
Loading…
x
Reference in New Issue
Block a user