mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-06 06:33:24 +00:00
Split the estimate() interface into separate functions for each type. NFC.
It was hacky to use an opcode as a switch because it won't always match (rsqrte != sqrte), and it looks like we'll need to add more special casing per arch than I had hoped for. Eg, x86 will prefer a different NR estimate implementation. ARM will want to use it's 'step' instructions. There also don't appear to be any new estimate instructions in any arch in a long, long time. Altivec vloge and vexpte may have been the first and last in that field... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218698 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9952c922c2
commit
cafc85bf1e
@ -2624,21 +2624,37 @@ public:
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Hooks for building estimates in place of, for example, slower divisions
|
/// Hooks for building estimates in place of slower divisions and square
|
||||||
/// and square roots. These are not builder functions themselves, just the
|
/// roots.
|
||||||
/// target-specific variables needed for building the estimate algorithm.
|
|
||||||
|
/// Return a reciprocal square root estimate value for the input operand.
|
||||||
/// Return an estimate value for the input opcode and input operand.
|
/// The RefinementSteps output is the number of Newton-Raphson refinement
|
||||||
/// The RefinementSteps output is the number of refinement iterations
|
/// iterations required to generate a sufficient (though not necessarily
|
||||||
/// required to generate a sufficient (though not necessarily IEEE-754
|
/// IEEE-754 compliant) estimate for the value type.
|
||||||
/// compliant) estimate for the value type.
|
/// A target may choose to implement its own refinement within this function.
|
||||||
|
/// If that's true, then return '0' as the number of RefinementSteps to avoid
|
||||||
|
/// any further refinement of the estimate.
|
||||||
/// An empty SDValue return means no estimate sequence can be created.
|
/// An empty SDValue return means no estimate sequence can be created.
|
||||||
virtual SDValue getEstimate(unsigned Opcode, SDValue Operand,
|
virtual SDValue getRsqrtEstimate(SDValue Operand,
|
||||||
DAGCombinerInfo &DCI,
|
DAGCombinerInfo &DCI,
|
||||||
unsigned &RefinementSteps) const {
|
unsigned &RefinementSteps) const {
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return a reciprocal estimate value for the input operand.
|
||||||
|
/// The RefinementSteps output is the number of Newton-Raphson refinement
|
||||||
|
/// iterations required to generate a sufficient (though not necessarily
|
||||||
|
/// IEEE-754 compliant) estimate for the value type.
|
||||||
|
/// A target may choose to implement its own refinement within this function.
|
||||||
|
/// If that's true, then return '0' as the number of RefinementSteps to avoid
|
||||||
|
/// any further refinement of the estimate.
|
||||||
|
/// An empty SDValue return means no estimate sequence can be created.
|
||||||
|
virtual SDValue getRecipEstimate(SDValue Operand,
|
||||||
|
DAGCombinerInfo &DCI,
|
||||||
|
unsigned &RefinementSteps) const {
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
// Legalization utility functions
|
// Legalization utility functions
|
||||||
//
|
//
|
||||||
|
@ -11779,7 +11779,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
|
|||||||
TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
|
TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
|
||||||
|
|
||||||
unsigned Iterations;
|
unsigned Iterations;
|
||||||
if (SDValue Est = TLI.getEstimate(ISD::FDIV, Op, DCI, Iterations)) {
|
if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
|
||||||
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
|
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
|
||||||
// For the reciprocal, we need to find the zero of the function:
|
// For the reciprocal, we need to find the zero of the function:
|
||||||
// F(X) = A X - 1 [which has a zero at X = 1/A]
|
// F(X) = A X - 1 [which has a zero at X = 1/A]
|
||||||
@ -11820,7 +11820,7 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
|
|||||||
// Expose the DAG combiner to the target combiner implementations.
|
// Expose the DAG combiner to the target combiner implementations.
|
||||||
TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
|
TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
|
||||||
unsigned Iterations;
|
unsigned Iterations;
|
||||||
if (SDValue Est = TLI.getEstimate(ISD::FSQRT, Op, DCI, Iterations)) {
|
if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) {
|
||||||
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
|
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
|
||||||
// For the reciprocal sqrt, we need to find the zero of the function:
|
// For the reciprocal sqrt, we need to find the zero of the function:
|
||||||
// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
|
// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
|
||||||
|
@ -7458,25 +7458,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||||||
// Target Optimization Hooks
|
// Target Optimization Hooks
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
|
SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
|
||||||
DAGCombinerInfo &DCI,
|
DAGCombinerInfo &DCI,
|
||||||
unsigned &RefinementSteps) const {
|
unsigned &RefinementSteps) const {
|
||||||
EVT VT = Operand.getValueType();
|
EVT VT = Operand.getValueType();
|
||||||
SDValue RV;
|
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
|
||||||
if (Opcode == ISD::FSQRT) {
|
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
|
||||||
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
|
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
|
||||||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
|
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
|
||||||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
|
|
||||||
(VT == MVT::v2f64 && Subtarget.hasVSX()))
|
|
||||||
RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
|
|
||||||
} else if (Opcode == ISD::FDIV) {
|
|
||||||
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
|
|
||||||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
|
|
||||||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
|
|
||||||
(VT == MVT::v2f64 && Subtarget.hasVSX()))
|
|
||||||
RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
|
|
||||||
}
|
|
||||||
if (RV.getNode()) {
|
|
||||||
// Convergence is quadratic, so we essentially double the number of digits
|
// Convergence is quadratic, so we essentially double the number of digits
|
||||||
// correct after every iteration. For both FRE and FRSQRTE, the minimum
|
// correct after every iteration. For both FRE and FRSQRTE, the minimum
|
||||||
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
|
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
|
||||||
@ -7484,8 +7473,29 @@ SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
|
|||||||
RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
|
RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
|
||||||
if (VT.getScalarType() == MVT::f64)
|
if (VT.getScalarType() == MVT::f64)
|
||||||
++RefinementSteps;
|
++RefinementSteps;
|
||||||
|
return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
|
||||||
}
|
}
|
||||||
return RV;
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
|
||||||
|
DAGCombinerInfo &DCI,
|
||||||
|
unsigned &RefinementSteps) const {
|
||||||
|
EVT VT = Operand.getValueType();
|
||||||
|
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
|
||||||
|
(VT == MVT::f64 && Subtarget.hasFRE()) ||
|
||||||
|
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
|
||||||
|
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
|
||||||
|
// Convergence is quadratic, so we essentially double the number of digits
|
||||||
|
// correct after every iteration. For both FRE and FRSQRTE, the minimum
|
||||||
|
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
|
||||||
|
// 2^-14. IEEE float has 23 digits and double has 52 digits.
|
||||||
|
RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
|
||||||
|
if (VT.getScalarType() == MVT::f64)
|
||||||
|
++RefinementSteps;
|
||||||
|
return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
|
||||||
|
}
|
||||||
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
|
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
|
||||||
|
@ -701,9 +701,10 @@ namespace llvm {
|
|||||||
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
|
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||||
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
|
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||||
|
|
||||||
SDValue getEstimate(unsigned Opcode, SDValue Operand,
|
SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
|
||||||
DAGCombinerInfo &DCI,
|
unsigned &RefinementSteps) const override;
|
||||||
unsigned &RefinementSteps) const override;
|
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
|
||||||
|
unsigned &RefinementSteps) const override;
|
||||||
|
|
||||||
CCAssignFn *useFastISelCCs(unsigned Flag) const;
|
CCAssignFn *useFastISelCCs(unsigned Flag) const;
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user