diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 54d8b99ad45..e6c4634079c 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -268,6 +268,13 @@ public: return HasFloatingPointExceptions; } + /// Return true if target always beneficiates from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + virtual bool enableAggressiveFMAFusion(EVT VT) const { + return false; + } + /// Return the ValueType of the result of SETCC operations. Also used to /// obtain the target's preferred type for the condition operand of SELECT and /// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 013fbb0b004..aa2f2d1f2b1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6684,13 +6684,15 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); } @@ -6762,14 +6764,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), @@ -6778,7 +6782,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && - N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || + TLI.enableAggressiveFMAFusion(VT))) { SDValue N00 = N0.getOperand(0).getOperand(0); SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(ISD::FMA, dl, VT, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6eded03313d..fd188fe37e2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -826,6 +826,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { return VT.changeVectorElementTypeToInteger(); } +bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { + assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); + return true; +} + //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index de8f8366f96..c53dc83fa8a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -360,6 +360,11 @@ namespace llvm { /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + /// Return true if target always beneficiates from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + bool enableAggressiveFMAFusion(EVT VT) const override; + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll index db19761b431..6bef3acf64b 100644 --- a/test/CodeGen/PowerPC/fma.ll +++ b/test/CodeGen/PowerPC/fma.ll @@ -1,8 +1,12 @@ ; RUN: llc < %s -march=ppc32 -fp-contract=fast | FileCheck %s +declare double @dummy1(double) #0 +declare double @dummy2(double, double) #0 +declare double @dummy3(double, double, double) #0 + define double @test_FMADD1(double %A, double %B, double %C) { %D = fmul double %A, %B ; [#uses=1] - %E = fadd double %D, %C ; [#uses=1] + %E = fadd double %C, %D ; [#uses=1] ret double %E ; CHECK-LABEL: test_FMADD1: ; CHECK: fmadd @@ -18,15 +22,26 @@ define double @test_FMADD2(double %A, double %B, double %C) { ; CHECK-NEXT: blr } -define double @test_FMSUB(double %A, double %B, double %C) { +define double @test_FMSUB1(double %A, double %B, double %C) { %D = fmul double %A, %B ; [#uses=1] %E = fsub double %D, %C ; [#uses=1] ret double %E -; CHECK-LABEL: test_FMSUB: +; CHECK-LABEL: test_FMSUB1: ; CHECK: fmsub ; CHECK-NEXT: blr } +define double @test_FMSUB2(double %A, double %B, double %C, double %D) { + %E = fmul double %A, %B ; [#uses=2] + %F = fadd double %E, %C ; [#uses=1] + %G = fsub double %E, %D ; [#uses=1] + %H = call double @dummy2(double %F, double %G) ; [#uses=1] + ret double %H +; CHECK-LABEL: test_FMSUB2: +; CHECK: fmadd +; CHECK-NEXT: fmsub +} + define double @test_FNMADD1(double %A, double %B, double %C) { %D = fmul double %A, %B ; [#uses=1] %E = fadd double %D, %C ; [#uses=1]