mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 02:33:33 +00:00
R600: Implement getRecipEstimate
This requires a new hook to prevent expanding sqrt in terms of rsqrt and reciprocal. v_rcp_f32, v_rsq_f32, and v_sqrt_f32 are all the same rate, so this expansion would just double the number of instructions and cycles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225828 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8603a3d1c5
commit
7c06364dc0
@ -217,6 +217,11 @@ public:
|
|||||||
/// several shifts, adds, and multiplies for this target.
|
/// several shifts, adds, and multiplies for this target.
|
||||||
bool isIntDivCheap() const { return IntDivIsCheap; }
|
bool isIntDivCheap() const { return IntDivIsCheap; }
|
||||||
|
|
||||||
|
/// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x)
|
||||||
|
bool isFsqrtCheap() const {
|
||||||
|
return FsqrtIsCheap;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if target has indicated at least one type should be bypassed.
|
/// Returns true if target has indicated at least one type should be bypassed.
|
||||||
bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
|
bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
|
||||||
|
|
||||||
@ -1183,7 +1188,11 @@ protected:
|
|||||||
/// possible, should be replaced by an alternate sequence of instructions not
|
/// possible, should be replaced by an alternate sequence of instructions not
|
||||||
/// containing an integer divide.
|
/// containing an integer divide.
|
||||||
void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
|
void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
|
||||||
|
|
||||||
|
/// Tells the code generator that fsqrt is cheap, and should not be replaced
|
||||||
|
/// with an alternative sequence of instructions.
|
||||||
|
void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; }
|
||||||
|
|
||||||
/// Tells the code generator that this target supports floating point
|
/// Tells the code generator that this target supports floating point
|
||||||
/// exceptions and cares about preserving floating point exception behavior.
|
/// exceptions and cares about preserving floating point exception behavior.
|
||||||
void setHasFloatingPointExceptions(bool FPExceptions = true) {
|
void setHasFloatingPointExceptions(bool FPExceptions = true) {
|
||||||
@ -1625,6 +1634,9 @@ private:
|
|||||||
/// unconditionally.
|
/// unconditionally.
|
||||||
bool IntDivIsCheap;
|
bool IntDivIsCheap;
|
||||||
|
|
||||||
|
// Don't expand fsqrt with an approximation based on the inverse sqrt.
|
||||||
|
bool FsqrtIsCheap;
|
||||||
|
|
||||||
/// Tells the code generator to bypass slow divide or remainder
|
/// Tells the code generator to bypass slow divide or remainder
|
||||||
/// instructions. For example, BypassSlowDivWidths[32,8] tells the code
|
/// instructions. For example, BypassSlowDivWidths[32,8] tells the code
|
||||||
/// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
|
/// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
|
||||||
|
@ -7538,7 +7538,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
|
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
|
||||||
if (DAG.getTarget().Options.UnsafeFPMath) {
|
if (DAG.getTarget().Options.UnsafeFPMath &&
|
||||||
|
!TLI.isFsqrtCheap()) {
|
||||||
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
|
// Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
|
||||||
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
|
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
|
||||||
EVT VT = RV.getValueType();
|
EVT VT = RV.getValueType();
|
||||||
|
@ -710,6 +710,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
|
|||||||
HasMultipleConditionRegisters = false;
|
HasMultipleConditionRegisters = false;
|
||||||
HasExtractBitsInsn = false;
|
HasExtractBitsInsn = false;
|
||||||
IntDivIsCheap = false;
|
IntDivIsCheap = false;
|
||||||
|
FsqrtIsCheap = false;
|
||||||
Pow2SDivIsCheap = false;
|
Pow2SDivIsCheap = false;
|
||||||
JumpIsExpensive = false;
|
JumpIsExpensive = false;
|
||||||
PredictableSelectIsExpensive = false;
|
PredictableSelectIsExpensive = false;
|
||||||
|
@ -403,6 +403,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
|||||||
// large sequence of instructions.
|
// large sequence of instructions.
|
||||||
setIntDivIsCheap(false);
|
setIntDivIsCheap(false);
|
||||||
setPow2SDivIsCheap(false);
|
setPow2SDivIsCheap(false);
|
||||||
|
setFsqrtIsCheap(true);
|
||||||
|
|
||||||
// FIXME: Need to really handle these.
|
// FIXME: Need to really handle these.
|
||||||
MaxStoresPerMemcpy = 4096;
|
MaxStoresPerMemcpy = 4096;
|
||||||
@ -2585,6 +2586,28 @@ SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
|
||||||
|
DAGCombinerInfo &DCI,
|
||||||
|
unsigned &RefinementSteps) const {
|
||||||
|
SelectionDAG &DAG = DCI.DAG;
|
||||||
|
EVT VT = Operand.getValueType();
|
||||||
|
|
||||||
|
if (VT == MVT::f32) {
|
||||||
|
// Reciprocal, < 1 ulp error.
|
||||||
|
//
|
||||||
|
// This reciprocal approximation converges to < 0.5 ulp error with one
|
||||||
|
// newton rhapson performed with two fused multiple adds (FMAs).
|
||||||
|
|
||||||
|
RefinementSteps = 0;
|
||||||
|
return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: There is also f64 rcp instruction, but the documentation is less
|
||||||
|
// clear on its precision.
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
static void computeKnownBitsForMinMax(const SDValue Op0,
|
static void computeKnownBitsForMinMax(const SDValue Op0,
|
||||||
const SDValue Op1,
|
const SDValue Op1,
|
||||||
APInt &KnownZero,
|
APInt &KnownZero,
|
||||||
|
@ -171,6 +171,9 @@ public:
|
|||||||
DAGCombinerInfo &DCI,
|
DAGCombinerInfo &DCI,
|
||||||
unsigned &RefinementSteps,
|
unsigned &RefinementSteps,
|
||||||
bool &UseOneConstNR) const override;
|
bool &UseOneConstNR) const override;
|
||||||
|
SDValue getRecipEstimate(SDValue Operand,
|
||||||
|
DAGCombinerInfo &DCI,
|
||||||
|
unsigned &RefinementSteps) const override;
|
||||||
|
|
||||||
virtual SDNode *PostISelFolding(MachineSDNode *N,
|
virtual SDNode *PostISelFolding(MachineSDNode *N,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user