Make the PPC fast-math sqrt expansion safe at 0

In fast-math mode sqrt(x) is calculated using the fast expansion of the
reciprocal of the reciprocal sqrt expansion. The reciprocal and reciprocal
sqrt expansions use the associated estimate instructions along with some Newton
iterations. Unfortunately, as a result, sqrt(0) was being calculated as NaN,
which is not correct. Now we explicitly return a result of zero if the input is
zero.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190624 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2013-09-12 19:04:12 +00:00
parent ba7183bc52
commit 4a1535c038
2 changed files with 25 additions and 2 deletions

View File

@ -7021,9 +7021,29 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (RV.getNode() != 0) { if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode()); DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI); RV = DAGCombineFastRecip(RV, DCI);
if (RV.getNode() != 0) if (RV.getNode() != 0) {
// Unfortunately, RV is now NaN if the input was exactly 0. Select out
// this case and force the answer to 0.
EVT VT = RV.getValueType();
SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
if (VT.isVector()) {
assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
}
SDValue ZeroCmp =
DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
N->getOperand(0), Zero, ISD::SETEQ);
DCI.AddToWorklist(ZeroCmp.getNode());
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
ZeroCmp, Zero, RV);
return RV; return RV;
} }
}
} }
break; break;

View File

@ -169,6 +169,7 @@ entry:
ret double %r ret double %r
; CHECK: @foo3 ; CHECK: @foo3
; CHECK: fcmpu
; CHECK-DAG: frsqrte ; CHECK-DAG: frsqrte
; CHECK-DAG: fnmsub ; CHECK-DAG: fnmsub
; CHECK: fmul ; CHECK: fmul
@ -195,6 +196,7 @@ entry:
ret float %r ret float %r
; CHECK: @goo3 ; CHECK: @goo3
; CHECK: fcmpu
; CHECK-DAG: frsqrtes ; CHECK-DAG: frsqrtes
; CHECK-DAG: fnmsubs ; CHECK-DAG: fnmsubs
; CHECK: fmuls ; CHECK: fmuls
@ -217,7 +219,8 @@ entry:
; CHECK: @hoo3 ; CHECK: @hoo3
; CHECK: vrsqrtefp ; CHECK: vrsqrtefp
; CHECK: vrefp ; CHECK-DAG: vrefp
; CHECK-DAG: vcmpeqfp
; CHECK-SAFE: @hoo3 ; CHECK-SAFE: @hoo3
; CHECK-SAFE-NOT: vrsqrtefp ; CHECK-SAFE-NOT: vrsqrtefp