Fast-math fold: x / (y * sqrt(z)) -> x * (rsqrt(z) / y)

The motivation is to recognize code such as this from /llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c:

float distance = sqrt(dx * dx + dy * dy + dz * dz);
float mag = dt / (distance * distance * distance);

Without this patch, we don't match the sqrt as a reciprocal sqrt, so for PPC the new testcase in this patch produces:

   addis 3, 2, .LCPI4_2@toc@ha
   lfs 4, .LCPI4_2@toc@l(3)
   addis 3, 2, .LCPI4_1@toc@ha
   lfs 0, .LCPI4_1@toc@l(3)
   fcmpu 0, 1, 4
   beq 0, .LBB4_2
# BB#1:
   frsqrtes 4, 1
   addis 3, 2, .LCPI4_0@toc@ha
   lfs 5, .LCPI4_0@toc@l(3)
   fnmsubs 13, 1, 5, 1
   fmuls 6, 4, 4
   fmadds 1, 13, 6, 5
   fmuls 1, 4, 1
   fres 4, 1                <--- reciprocal of reciprocal square root
   fnmsubs 1, 1, 4, 0
   fmadds 4, 4, 1, 4
.LBB4_2:
   fmuls 1, 4, 2
   fres 2, 1
   fnmsubs 0, 1, 2, 0
   fmadds 0, 2, 0, 2
   fmuls 1, 3, 0
   blr

After the patch, this simplifies to:

frsqrtes 0, 1
addis 3, 2, .LCPI4_1@toc@ha
fres 5, 2
lfs 4, .LCPI4_1@toc@l(3)
addis 3, 2, .LCPI4_0@toc@ha
lfs 7, .LCPI4_0@toc@l(3)
fnmsubs 13, 1, 4, 1
fmuls 6, 0, 0
fnmsubs 2, 2, 5, 7
fmadds 1, 13, 6, 4
fmadds 2, 5, 2, 5
fmuls 0, 0, 1
fmuls 0, 0, 2
fmuls 1, 3, 0
blr

Differential Revision: http://reviews.llvm.org/D5628



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219139 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjay Patel 2014-10-06 19:31:18 +00:00
parent b9ce60ff09
commit b67100314b
2 changed files with 50 additions and 0 deletions

View File

@ -7036,6 +7036,28 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
AddToWorklist(RV.getNode()); AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} }
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
// it's still worthwhile to get rid of the FSQRT if possible.
SDValue SqrtOp;
SDValue OtherOp;
if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
SqrtOp = N1.getOperand(0);
OtherOp = N1.getOperand(1);
} else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
SqrtOp = N1.getOperand(1);
OtherOp = N1.getOperand(0);
}
if (SqrtOp.getNode()) {
// We found a FSQRT, so try to make this fold:
// x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
}
} }
// Fold into a reciprocal estimate and multiply instead of a real divide. // Fold into a reciprocal estimate and multiply instead of a real divide.

View File

@ -96,6 +96,34 @@ define float @goo(float %a, float %b) nounwind {
; CHECK-SAFE: blr ; CHECK-SAFE: blr
} }
; Recognize that this is rsqrt(a) * rcp(b) * c,
; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
define float @rsqrt_fmul(float %a, float %b, float %c) {
%x = call float @llvm.sqrt.f32(float %a)
%y = fmul float %x, %b
%z = fdiv float %c, %y
ret float %z
; CHECK: @rsqrt_fmul
; CHECK-DAG: frsqrtes
; CHECK-DAG: fres
; CHECK-DAG: fnmsubs
; CHECK-DAG: fmuls
; CHECK-DAG: fnmsubs
; CHECK-DAG: fmadds
; CHECK-DAG: fmadds
; CHECK: fmuls
; CHECK-NEXT: fmuls
; CHECK-NEXT: fmuls
; CHECK-NEXT: blr
; CHECK-SAFE: @rsqrt_fmul
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fmuls
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}
define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind { define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%r = fdiv <4 x float> %a, %x %r = fdiv <4 x float> %a, %x