From 09ad94decbd23c28378b529a76cbf1950e064111 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Fri, 21 Nov 2014 06:39:58 +0000 Subject: [PATCH] DAGCombiner: Allow the DAGCombiner to combine multiple FDIVs with the same divisor info FMULs by the reciprocal. E.g., ( a / D; b / D ) -> ( recip = 1.0 / D; a * recip; b * recip) A hook is added to allow the target to control whether it needs to do such combine. Reviewed in http://reviews.llvm.org/D6334 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222510 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 6 ++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 38 +++++++++ lib/Target/AArch64/AArch64ISelLowering.cpp | 6 ++ lib/Target/AArch64/AArch64ISelLowering.h | 1 + test/CodeGen/AArch64/fdiv-combine.ll | 94 ++++++++++++++++++++++ 5 files changed, 145 insertions(+) create mode 100644 test/CodeGen/AArch64/fdiv-combine.ll diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 882dab40e41..c0f2395eca6 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -2652,6 +2652,12 @@ public: return SDValue(); } + /// Indicate whether this target prefers to combine the given number of FDIVs + /// with the same divisor. + virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const { + return false; + } + /// Hooks for building estimates in place of slower divisions and square /// roots. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a1291ed6e49..860423a963b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7104,6 +7104,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } } + // Combine multiple FDIVs with the same divisor into multiple FMULs by the + // reciprocal. + // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) + // Notice that this is not always beneficial. One reason is different target + // may have different costs for FDIV and FMUL, so sometimes the cost of two + // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason + // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". + if (Options.UnsafeFPMath) { + // Skip if current node is a reciprocal. + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + SmallVector Users; + // Find all FDIV users of the same divisor. + for (SDNode::use_iterator UI = N1.getNode()->use_begin(), + UE = N1.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = UI.getUse().getUser(); + if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) + Users.push_back(User); + } + + if (TLI.combineRepeatedFPDivisors(Users.size())) { + SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 + SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { + if ((*I)->getOperand(0) != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, + (*I)->getOperand(0), Reciprocal); + DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); + } + } + return SDValue(); + } + } + return SDValue(); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 7c94d831cba..70cf7599a39 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8732,6 +8732,12 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const { return true; } +bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { + // Combine multiple FDIVs with the same divisor into multiple FMULs by the + // reciprocal if there are three or more FDIVs. + return NumUsers > 2; +} + TargetLoweringBase::LegalizeTypeAction AArch64TargetLowering::getPreferredVectorAction(EVT VT) const { MVT SVT = VT.getSimpleVT(); diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 2f5708dd897..c76f6a865dc 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -440,6 +440,7 @@ private: SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; + bool combineRepeatedFPDivisors(unsigned NumUsers) const override; ConstraintType getConstraintType(const std::string &Constraint) const override; diff --git a/test/CodeGen/AArch64/fdiv-combine.ll b/test/CodeGen/AArch64/fdiv-combine.ll new file mode 100644 index 00000000000..389eefd97b2 --- /dev/null +++ b/test/CodeGen/AArch64/fdiv-combine.ll @@ -0,0 +1,94 @@ +; RUN: llc -march=aarch64 < %s | FileCheck %s + +; Following test cases check: +; a / D; b / D; c / D; +; => +; recip = 1.0 / D; a * recip; b * recip; c * recip; +define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 { +; CHECK-LABEL: three_fdiv_float: +; CHECK: fdiv +; CHECK-NEXT-NOT: fdiv +; CHECK: fmul +; CHECK: fmul +; CHECK: fmul + %div = fdiv float %a, %D + %div1 = fdiv float %b, %D + %div2 = fdiv float %c, %D + tail call void @foo_3f(float %div, float %div1, float %div2) + ret void +} + +define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 { +; CHECK-LABEL: three_fdiv_double: +; CHECK: fdiv +; CHECK-NEXT-NOT: fdiv +; CHECK: fmul +; CHECK: fmul +; CHECK: fmul + %div = fdiv double %a, %D + %div1 = fdiv double %b, %D + %div2 = fdiv double %c, %D + tail call void @foo_3d(double %div, double %div1, double %div2) + ret void +} + +define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; CHECK-LABEL: three_fdiv_4xfloat: +; CHECK: fdiv +; CHECK-NEXT-NOT: fdiv +; CHECK: fmul +; CHECK: fmul +; CHECK: fmul + %div = fdiv <4 x float> %a, %D + %div1 = fdiv <4 x float> %b, %D + %div2 = fdiv <4 x float> %c, %D + tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2) + ret void +} + +define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { +; CHECK-LABEL: three_fdiv_2xdouble: +; CHECK: fdiv +; CHECK-NEXT-NOT: fdiv +; CHECK: fmul +; CHECK: fmul +; CHECK: fmul + %div = fdiv <2 x double> %a, %D + %div1 = fdiv <2 x double> %b, %D + %div2 = fdiv <2 x double> %c, %D + tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2) + ret void +} + +; Following test cases check we never combine two FDIVs if neither of them +; calculates a reciprocal. +define void @two_fdiv_float(float %D, float %a, float %b) #0 { +; CHECK-LABEL: two_fdiv_float: +; CHECK: fdiv +; CHECK: fdiv +; CHECK-NEXT-NOT: fmul + %div = fdiv float %a, %D + %div1 = fdiv float %b, %D + tail call void @foo_2f(float %div, float %div1) + ret void +} + +define void @two_fdiv_double(double %D, double %a, double %b) #0 { +; CHECK-LABEL: two_fdiv_double: +; CHECK: fdiv +; CHECK: fdiv +; CHECK-NEXT-NOT: fmul + %div = fdiv double %a, %D + %div1 = fdiv double %b, %D + tail call void @foo_2d(double %div, double %div1) + ret void +} + +declare void @foo_3f(float, float, float) +declare void @foo_3d(double, double, double) +declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>) +declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) +declare void @foo_2f(float, float) +declare void @foo_2d(double, double) + +attributes #0 = { "unsafe-fp-math"="true" }