mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-10 20:33:15 +00:00
Fast-math fold: x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
The motivation is to recognize code such as this from /llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c: float distance = sqrt(dx * dx + dy * dy + dz * dz); float mag = dt / (distance * distance * distance); Without this patch, we don't match the sqrt as a reciprocal sqrt, so for PPC the new testcase in this patch produces: addis 3, 2, .LCPI4_2@toc@ha lfs 4, .LCPI4_2@toc@l(3) addis 3, 2, .LCPI4_1@toc@ha lfs 0, .LCPI4_1@toc@l(3) fcmpu 0, 1, 4 beq 0, .LBB4_2 # BB#1: frsqrtes 4, 1 addis 3, 2, .LCPI4_0@toc@ha lfs 5, .LCPI4_0@toc@l(3) fnmsubs 13, 1, 5, 1 fmuls 6, 4, 4 fmadds 1, 13, 6, 5 fmuls 1, 4, 1 fres 4, 1 <--- reciprocal of reciprocal square root fnmsubs 1, 1, 4, 0 fmadds 4, 4, 1, 4 .LBB4_2: fmuls 1, 4, 2 fres 2, 1 fnmsubs 0, 1, 2, 0 fmadds 0, 2, 0, 2 fmuls 1, 3, 0 blr After the patch, this simplifies to: frsqrtes 0, 1 addis 3, 2, .LCPI4_1@toc@ha fres 5, 2 lfs 4, .LCPI4_1@toc@l(3) addis 3, 2, .LCPI4_0@toc@ha lfs 7, .LCPI4_0@toc@l(3) fnmsubs 13, 1, 4, 1 fmuls 6, 0, 0 fnmsubs 2, 2, 5, 7 fmadds 1, 13, 6, 4 fmadds 2, 5, 2, 5 fmuls 0, 0, 1 fmuls 0, 0, 2 fmuls 1, 3, 0 blr Differential Revision: http://reviews.llvm.org/D5628 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219139 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b9ce60ff09
commit
b67100314b
@ -7036,6 +7036,28 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
|
|||||||
AddToWorklist(RV.getNode());
|
AddToWorklist(RV.getNode());
|
||||||
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
|
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
|
||||||
}
|
}
|
||||||
|
} else if (N1.getOpcode() == ISD::FMUL) {
|
||||||
|
// Look through an FMUL. Even though this won't remove the FDIV directly,
|
||||||
|
// it's still worthwhile to get rid of the FSQRT if possible.
|
||||||
|
SDValue SqrtOp;
|
||||||
|
SDValue OtherOp;
|
||||||
|
if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
|
||||||
|
SqrtOp = N1.getOperand(0);
|
||||||
|
OtherOp = N1.getOperand(1);
|
||||||
|
} else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
|
||||||
|
SqrtOp = N1.getOperand(1);
|
||||||
|
OtherOp = N1.getOperand(0);
|
||||||
|
}
|
||||||
|
if (SqrtOp.getNode()) {
|
||||||
|
// We found a FSQRT, so try to make this fold:
|
||||||
|
// x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
|
||||||
|
if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
|
||||||
|
AddToWorklist(RV.getNode());
|
||||||
|
RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
|
||||||
|
AddToWorklist(RV.getNode());
|
||||||
|
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fold into a reciprocal estimate and multiply instead of a real divide.
|
// Fold into a reciprocal estimate and multiply instead of a real divide.
|
||||||
|
@ -96,6 +96,34 @@ define float @goo(float %a, float %b) nounwind {
|
|||||||
; CHECK-SAFE: blr
|
; CHECK-SAFE: blr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Recognize that this is rsqrt(a) * rcp(b) * c,
|
||||||
|
; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
|
||||||
|
define float @rsqrt_fmul(float %a, float %b, float %c) {
|
||||||
|
%x = call float @llvm.sqrt.f32(float %a)
|
||||||
|
%y = fmul float %x, %b
|
||||||
|
%z = fdiv float %c, %y
|
||||||
|
ret float %z
|
||||||
|
|
||||||
|
; CHECK: @rsqrt_fmul
|
||||||
|
; CHECK-DAG: frsqrtes
|
||||||
|
; CHECK-DAG: fres
|
||||||
|
; CHECK-DAG: fnmsubs
|
||||||
|
; CHECK-DAG: fmuls
|
||||||
|
; CHECK-DAG: fnmsubs
|
||||||
|
; CHECK-DAG: fmadds
|
||||||
|
; CHECK-DAG: fmadds
|
||||||
|
; CHECK: fmuls
|
||||||
|
; CHECK-NEXT: fmuls
|
||||||
|
; CHECK-NEXT: fmuls
|
||||||
|
; CHECK-NEXT: blr
|
||||||
|
|
||||||
|
; CHECK-SAFE: @rsqrt_fmul
|
||||||
|
; CHECK-SAFE: fsqrts
|
||||||
|
; CHECK-SAFE: fmuls
|
||||||
|
; CHECK-SAFE: fdivs
|
||||||
|
; CHECK-SAFE: blr
|
||||||
|
}
|
||||||
|
|
||||||
define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
|
define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
|
||||||
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||||
%r = fdiv <4 x float> %a, %x
|
%r = fdiv <4 x float> %a, %x
|
||||||
|
Loading…
x
Reference in New Issue
Block a user