mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-01 15:17:25 +00:00
R600: Implement getRecipEstimate
This requires a new hook to prevent expanding sqrt in terms of rsqrt and reciprocal. v_rcp_f32, v_rsq_f32, and v_sqrt_f32 are all the same rate, so this expansion would just double the number of instructions and cycles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225828 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -403,6 +403,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
// large sequence of instructions.
|
||||
setIntDivIsCheap(false);
|
||||
setPow2SDivIsCheap(false);
|
||||
setFsqrtIsCheap(true);
|
||||
|
||||
// FIXME: Need to really handle these.
|
||||
MaxStoresPerMemcpy = 4096;
|
||||
@@ -2585,6 +2586,28 @@ SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
|
||||
DAGCombinerInfo &DCI,
|
||||
unsigned &RefinementSteps) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
EVT VT = Operand.getValueType();
|
||||
|
||||
if (VT == MVT::f32) {
|
||||
// Reciprocal, < 1 ulp error.
|
||||
//
|
||||
// This reciprocal approximation converges to < 0.5 ulp error with one
|
||||
// newton rhapson performed with two fused multiple adds (FMAs).
|
||||
|
||||
RefinementSteps = 0;
|
||||
return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
|
||||
}
|
||||
|
||||
// TODO: There is also f64 rcp instruction, but the documentation is less
|
||||
// clear on its precision.
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static void computeKnownBitsForMinMax(const SDValue Op0,
|
||||
const SDValue Op1,
|
||||
APInt &KnownZero,
|
||||
|
||||
Reference in New Issue
Block a user