mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-26 23:32:58 +00:00
DAGCombiner: Allow the DAGCombiner to combine multiple FDIVs with the same divisor info FMULs by the reciprocal.
E.g., ( a / D; b / D ) -> ( recip = 1.0 / D; a * recip; b * recip) A hook is added to allow the target to control whether it needs to do such combine. Reviewed in http://reviews.llvm.org/D6334 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222510 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e0ed7df6b0
commit
09ad94decb
@ -2652,6 +2652,12 @@ public:
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Indicate whether this target prefers to combine the given number of FDIVs
|
||||||
|
/// with the same divisor.
|
||||||
|
virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// Hooks for building estimates in place of slower divisions and square
|
/// Hooks for building estimates in place of slower divisions and square
|
||||||
/// roots.
|
/// roots.
|
||||||
|
|
||||||
|
@ -7104,6 +7104,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
|
||||||
|
// reciprocal.
|
||||||
|
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
|
||||||
|
// Notice that this is not always beneficial. One reason is different target
|
||||||
|
// may have different costs for FDIV and FMUL, so sometimes the cost of two
|
||||||
|
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
|
||||||
|
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
|
||||||
|
if (Options.UnsafeFPMath) {
|
||||||
|
// Skip if current node is a reciprocal.
|
||||||
|
if (N0CFP && N0CFP->isExactlyValue(1.0))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SmallVector<SDNode *, 4> Users;
|
||||||
|
// Find all FDIV users of the same divisor.
|
||||||
|
for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
|
||||||
|
UE = N1.getNode()->use_end();
|
||||||
|
UI != UE; ++UI) {
|
||||||
|
SDNode *User = UI.getUse().getUser();
|
||||||
|
if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
|
||||||
|
Users.push_back(User);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TLI.combineRepeatedFPDivisors(Users.size())) {
|
||||||
|
SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
|
||||||
|
SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
|
||||||
|
|
||||||
|
// Dividend / Divisor -> Dividend * Reciprocal
|
||||||
|
for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
|
||||||
|
if ((*I)->getOperand(0) != FPOne) {
|
||||||
|
SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
|
||||||
|
(*I)->getOperand(0), Reciprocal);
|
||||||
|
DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8732,6 +8732,12 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
|
||||||
|
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
|
||||||
|
// reciprocal if there are three or more FDIVs.
|
||||||
|
return NumUsers > 2;
|
||||||
|
}
|
||||||
|
|
||||||
TargetLoweringBase::LegalizeTypeAction
|
TargetLoweringBase::LegalizeTypeAction
|
||||||
AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
|
AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
|
||||||
MVT SVT = VT.getSimpleVT();
|
MVT SVT = VT.getSimpleVT();
|
||||||
|
@ -440,6 +440,7 @@ private:
|
|||||||
|
|
||||||
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
||||||
std::vector<SDNode *> *Created) const override;
|
std::vector<SDNode *> *Created) const override;
|
||||||
|
bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
|
||||||
|
|
||||||
ConstraintType
|
ConstraintType
|
||||||
getConstraintType(const std::string &Constraint) const override;
|
getConstraintType(const std::string &Constraint) const override;
|
||||||
|
94
test/CodeGen/AArch64/fdiv-combine.ll
Normal file
94
test/CodeGen/AArch64/fdiv-combine.ll
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
; RUN: llc -march=aarch64 < %s | FileCheck %s
|
||||||
|
|
||||||
|
; Following test cases check:
|
||||||
|
; a / D; b / D; c / D;
|
||||||
|
; =>
|
||||||
|
; recip = 1.0 / D; a * recip; b * recip; c * recip;
|
||||||
|
define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
|
||||||
|
; CHECK-LABEL: three_fdiv_float:
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK-NEXT-NOT: fdiv
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
%div = fdiv float %a, %D
|
||||||
|
%div1 = fdiv float %b, %D
|
||||||
|
%div2 = fdiv float %c, %D
|
||||||
|
tail call void @foo_3f(float %div, float %div1, float %div2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
|
||||||
|
; CHECK-LABEL: three_fdiv_double:
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK-NEXT-NOT: fdiv
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
%div = fdiv double %a, %D
|
||||||
|
%div1 = fdiv double %b, %D
|
||||||
|
%div2 = fdiv double %c, %D
|
||||||
|
tail call void @foo_3d(double %div, double %div1, double %div2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
|
||||||
|
; CHECK-LABEL: three_fdiv_4xfloat:
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK-NEXT-NOT: fdiv
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
%div = fdiv <4 x float> %a, %D
|
||||||
|
%div1 = fdiv <4 x float> %b, %D
|
||||||
|
%div2 = fdiv <4 x float> %c, %D
|
||||||
|
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
|
||||||
|
; CHECK-LABEL: three_fdiv_2xdouble:
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK-NEXT-NOT: fdiv
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
; CHECK: fmul
|
||||||
|
%div = fdiv <2 x double> %a, %D
|
||||||
|
%div1 = fdiv <2 x double> %b, %D
|
||||||
|
%div2 = fdiv <2 x double> %c, %D
|
||||||
|
tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Following test cases check we never combine two FDIVs if neither of them
|
||||||
|
; calculates a reciprocal.
|
||||||
|
define void @two_fdiv_float(float %D, float %a, float %b) #0 {
|
||||||
|
; CHECK-LABEL: two_fdiv_float:
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK-NEXT-NOT: fmul
|
||||||
|
%div = fdiv float %a, %D
|
||||||
|
%div1 = fdiv float %b, %D
|
||||||
|
tail call void @foo_2f(float %div, float %div1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @two_fdiv_double(double %D, double %a, double %b) #0 {
|
||||||
|
; CHECK-LABEL: two_fdiv_double:
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK: fdiv
|
||||||
|
; CHECK-NEXT-NOT: fmul
|
||||||
|
%div = fdiv double %a, %D
|
||||||
|
%div1 = fdiv double %b, %D
|
||||||
|
tail call void @foo_2d(double %div, double %div1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @foo_3f(float, float, float)
|
||||||
|
declare void @foo_3d(double, double, double)
|
||||||
|
declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
|
||||||
|
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
|
||||||
|
declare void @foo_2f(float, float)
|
||||||
|
declare void @foo_2d(double, double)
|
||||||
|
|
||||||
|
attributes #0 = { "unsafe-fp-math"="true" }
|
Loading…
x
Reference in New Issue
Block a user