mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-19 04:32:19 +00:00
Optionally enable more-aggressive FMA formation in DAGCombine
The heuristic used by DAGCombine to form FMAs checks that the FMUL has only one use, but this is overly-conservative on some systems. Specifically, if the FMA and the FADD have the same latency (and the FMA does not compete for resources with the FMUL any more than the FADD does), there is no need for the restriction, and furthermore, forming the FMA leaving the FMUL can still allow for higher overall throughput and decreased critical-path length. Here we add a new TLI callback, enableAggressiveFMAFusion, false by default, to elide the hasOneUse check. This is enabled for PowerPC by default, as most PowerPC systems will benefit. Patch by Olivier Sallenave, thanks! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218120 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
89436b4160
commit
c404e8208c
@ -268,6 +268,13 @@ public:
|
||||
return HasFloatingPointExceptions;
|
||||
}
|
||||
|
||||
/// Return true if target always beneficiates from combining into FMA for a
|
||||
/// given value type. This must typically return false on targets where FMA
|
||||
/// takes more cycles to execute than FADD.
|
||||
virtual bool enableAggressiveFMAFusion(EVT VT) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Return the ValueType of the result of SETCC operations. Also used to
|
||||
/// obtain the target's preferred type for the condition operand of SELECT and
|
||||
/// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other
|
||||
|
@ -6684,13 +6684,15 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
|
||||
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
|
||||
|
||||
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
|
||||
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
|
||||
if (N0.getOpcode() == ISD::FMUL &&
|
||||
(N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
|
||||
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
|
||||
N0.getOperand(0), N0.getOperand(1), N1);
|
||||
|
||||
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
|
||||
// Note: Commutes FADD operands.
|
||||
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
|
||||
if (N1.getOpcode() == ISD::FMUL &&
|
||||
(N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
|
||||
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
|
||||
N1.getOperand(0), N1.getOperand(1), N0);
|
||||
}
|
||||
@ -6762,14 +6764,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
|
||||
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
|
||||
|
||||
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
|
||||
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
|
||||
if (N0.getOpcode() == ISD::FMUL &&
|
||||
(N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
|
||||
return DAG.getNode(ISD::FMA, dl, VT,
|
||||
N0.getOperand(0), N0.getOperand(1),
|
||||
DAG.getNode(ISD::FNEG, dl, VT, N1));
|
||||
|
||||
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
|
||||
// Note: Commutes FSUB operands.
|
||||
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
|
||||
if (N1.getOpcode() == ISD::FMUL &&
|
||||
(N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
|
||||
return DAG.getNode(ISD::FMA, dl, VT,
|
||||
DAG.getNode(ISD::FNEG, dl, VT,
|
||||
N1.getOperand(0)),
|
||||
@ -6778,7 +6782,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
|
||||
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
|
||||
if (N0.getOpcode() == ISD::FNEG &&
|
||||
N0.getOperand(0).getOpcode() == ISD::FMUL &&
|
||||
N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
|
||||
((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
|
||||
TLI.enableAggressiveFMAFusion(VT))) {
|
||||
SDValue N00 = N0.getOperand(0).getOperand(0);
|
||||
SDValue N01 = N0.getOperand(0).getOperand(1);
|
||||
return DAG.getNode(ISD::FMA, dl, VT,
|
||||
|
@ -826,6 +826,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
|
||||
return VT.changeVectorElementTypeToInteger();
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
|
||||
assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Node matching predicates, for use by the tblgen matching code.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -360,6 +360,11 @@ namespace llvm {
|
||||
/// getSetCCResultType - Return the ISD::SETCC ValueType
|
||||
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
|
||||
|
||||
/// Return true if target always beneficiates from combining into FMA for a
|
||||
/// given value type. This must typically return false on targets where FMA
|
||||
/// takes more cycles to execute than FADD.
|
||||
bool enableAggressiveFMAFusion(EVT VT) const override;
|
||||
|
||||
/// getPreIndexedAddressParts - returns true by value, base pointer and
|
||||
/// offset pointer and addressing mode by reference if the node's address
|
||||
/// can be legally represented as pre-indexed load / store address.
|
||||
|
@ -1,8 +1,12 @@
|
||||
; RUN: llc < %s -march=ppc32 -fp-contract=fast | FileCheck %s
|
||||
|
||||
declare double @dummy1(double) #0
|
||||
declare double @dummy2(double, double) #0
|
||||
declare double @dummy3(double, double, double) #0
|
||||
|
||||
define double @test_FMADD1(double %A, double %B, double %C) {
|
||||
%D = fmul double %A, %B ; <double> [#uses=1]
|
||||
%E = fadd double %D, %C ; <double> [#uses=1]
|
||||
%E = fadd double %C, %D ; <double> [#uses=1]
|
||||
ret double %E
|
||||
; CHECK-LABEL: test_FMADD1:
|
||||
; CHECK: fmadd
|
||||
@ -18,15 +22,26 @@ define double @test_FMADD2(double %A, double %B, double %C) {
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
define double @test_FMSUB(double %A, double %B, double %C) {
|
||||
define double @test_FMSUB1(double %A, double %B, double %C) {
|
||||
%D = fmul double %A, %B ; <double> [#uses=1]
|
||||
%E = fsub double %D, %C ; <double> [#uses=1]
|
||||
ret double %E
|
||||
; CHECK-LABEL: test_FMSUB:
|
||||
; CHECK-LABEL: test_FMSUB1:
|
||||
; CHECK: fmsub
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
define double @test_FMSUB2(double %A, double %B, double %C, double %D) {
|
||||
%E = fmul double %A, %B ; <double> [#uses=2]
|
||||
%F = fadd double %E, %C ; <double> [#uses=1]
|
||||
%G = fsub double %E, %D ; <double> [#uses=1]
|
||||
%H = call double @dummy2(double %F, double %G) ; <double> [#uses=1]
|
||||
ret double %H
|
||||
; CHECK-LABEL: test_FMSUB2:
|
||||
; CHECK: fmadd
|
||||
; CHECK-NEXT: fmsub
|
||||
}
|
||||
|
||||
define double @test_FNMADD1(double %A, double %B, double %C) {
|
||||
%D = fmul double %A, %B ; <double> [#uses=1]
|
||||
%E = fadd double %D, %C ; <double> [#uses=1]
|
||||
|
Loading…
x
Reference in New Issue
Block a user