mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-31 10:34:17 +00:00
Add DAG-combines for aggressive FMA formation.
This patch adds DAG combines to form FMAs from pairs of FADD + FMUL or FSUB + FMUL. The combines are performed when: (a) Either AllowExcessFPPrecision option (-enable-excess-fp-precision for llc) OR UnsafeFPMath option (-enable-unsafe-fp-math) are set, and (b) TargetLoweringInfo::isFMAFasterThanMulAndAdd(VT) is true for the type of the FADD/FSUB, and (c) The FMUL only has one user (the FADD/FSUB). If your target has fast FMA instructions you can make use of these combines by overriding TargetLoweringInfo::isFMAFasterThanMulAndAdd(VT) to return true for types supported by your FMA instruction, and adding patterns to match ISD::FMA to your FMA instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158757 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fa8becb6f9
commit
d693cafcfb
@ -35,7 +35,7 @@ namespace llvm {
|
|||||||
TargetOptions()
|
TargetOptions()
|
||||||
: PrintMachineCode(false), NoFramePointerElim(false),
|
: PrintMachineCode(false), NoFramePointerElim(false),
|
||||||
NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false),
|
NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false),
|
||||||
NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
|
AllowExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
|
||||||
NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),
|
NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),
|
||||||
UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
|
UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
|
||||||
JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
|
JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
|
||||||
@ -74,13 +74,13 @@ namespace llvm {
|
|||||||
unsigned LessPreciseFPMADOption : 1;
|
unsigned LessPreciseFPMADOption : 1;
|
||||||
bool LessPreciseFPMAD() const;
|
bool LessPreciseFPMAD() const;
|
||||||
|
|
||||||
/// NoExcessFPPrecision - This flag is enabled when the
|
/// AllowExcessFPPrecision - This flag is enabled when the
|
||||||
/// -disable-excess-fp-precision flag is specified on the command line.
|
/// -enable-excess-fp-precision flag is specified on the command line. This
|
||||||
/// When this flag is off (the default), the code generator is allowed to
|
/// flag is OFF by default. When it is turned on, the code generator is
|
||||||
/// produce results that are "more precise" than IEEE allows. This includes
|
/// allowed to produce results that are "more precise" than IEEE allows.
|
||||||
/// use of FMA-like operations and use of the X86 FP registers without
|
/// This includes use of FMA-like operations and use of the X86 FP registers
|
||||||
/// rounding all over the place.
|
/// without rounding all over the place.
|
||||||
unsigned NoExcessFPPrecision : 1;
|
unsigned AllowExcessFPPrecision : 1;
|
||||||
|
|
||||||
/// UnsafeFPMath - This flag is enabled when the
|
/// UnsafeFPMath - This flag is enabled when the
|
||||||
/// -enable-unsafe-fp-math flag is specified on the command line. When
|
/// -enable-unsafe-fp-math flag is specified on the command line. When
|
||||||
|
@ -5633,6 +5633,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
|
|||||||
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
|
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
|
||||||
N0.getOperand(1), N1));
|
N0.getOperand(1), N1));
|
||||||
|
|
||||||
|
// FADD -> FMA combines:
|
||||||
|
if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
|
||||||
|
DAG.getTarget().Options.UnsafeFPMath) &&
|
||||||
|
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
|
||||||
|
TLI.isOperationLegal(ISD::FMA, VT)) {
|
||||||
|
|
||||||
|
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
|
||||||
|
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
|
||||||
|
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
|
||||||
|
N0.getOperand(0), N0.getOperand(1), N1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fold (fadd x, (fmul y, z)) -> (fma x, y, z)
|
||||||
|
// Note: Commutes FADD operands.
|
||||||
|
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
|
||||||
|
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
|
||||||
|
N1.getOperand(0), N1.getOperand(1), N0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5690,6 +5710,29 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FSUB -> FMA combines:
|
||||||
|
if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
|
||||||
|
DAG.getTarget().Options.UnsafeFPMath) &&
|
||||||
|
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
|
||||||
|
TLI.isOperationLegal(ISD::FMA, VT)) {
|
||||||
|
|
||||||
|
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
|
||||||
|
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
|
||||||
|
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
|
||||||
|
N0.getOperand(0), N0.getOperand(1),
|
||||||
|
DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
|
||||||
|
// Note: Commutes FSUB operands.
|
||||||
|
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
|
||||||
|
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
|
||||||
|
DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
|
||||||
|
N1.getOperand(0)),
|
||||||
|
N1.getOperand(1), N0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,7 +236,7 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
|
|||||||
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
|
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
|
||||||
// But only select them if more precision in FP computation is allowed.
|
// But only select them if more precision in FP computation is allowed.
|
||||||
// Do not use them for Darwin platforms.
|
// Do not use them for Darwin platforms.
|
||||||
def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && "
|
def UseFusedMAC : Predicate<"TM.Options.AllowExcessFPPrecision && "
|
||||||
"!Subtarget->isTargetDarwin()">;
|
"!Subtarget->isTargetDarwin()">;
|
||||||
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
|
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
|
||||||
"Subtarget->isTargetDarwin()">;
|
"Subtarget->isTargetDarwin()">;
|
||||||
|
@ -353,7 +353,7 @@ def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
|
|||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// PowerPC Instruction Predicate Definitions.
|
// PowerPC Instruction Predicate Definitions.
|
||||||
def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
|
def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">;
|
||||||
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
|
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
|
||||||
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
|
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
|
||||||
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
|
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s
|
; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -enable-excess-fp-precision | FileCheck %s
|
||||||
; Check generated fused MAC and MLS.
|
; Check generated fused MAC and MLS.
|
||||||
|
|
||||||
define double @fusedMACTest1(double %d1, double %d2, double %d3) {
|
define double @fusedMACTest1(double %d1, double %d2, double %d3) {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
|
; RUN: llc < %s -march=ppc64 -mcpu=a2 -enable-excess-fp-precision | FileCheck %s
|
||||||
|
|
||||||
%0 = type { double, double }
|
%0 = type { double, double }
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=ppc32 | \
|
; RUN: llc < %s -march=ppc32 -enable-excess-fp-precision | \
|
||||||
; RUN: egrep {fn?madd|fn?msub} | count 8
|
; RUN: egrep {fn?madd|fn?msub} | count 8
|
||||||
|
|
||||||
define double @test_FMADD1(double %A, double %B, double %C) {
|
define double @test_FMADD1(double %A, double %B, double %C) {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s
|
; RUN: llc < %s -march=ppc32 -mcpu=440 -enable-excess-fp-precision | FileCheck %s
|
||||||
|
|
||||||
%0 = type { double, double }
|
%0 = type { double, double }
|
||||||
|
|
||||||
|
@ -156,8 +156,8 @@ DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
|
|||||||
cl::init(false));
|
cl::init(false));
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
DisableExcessPrecision("disable-excess-fp-precision",
|
EnableExcessPrecision("enable-excess-fp-precision",
|
||||||
cl::desc("Disable optimizations that may increase FP precision"),
|
cl::desc("Enable optimizations that may increase FP precision"),
|
||||||
cl::init(false));
|
cl::init(false));
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
@ -404,7 +404,7 @@ int main(int argc, char **argv) {
|
|||||||
Options.LessPreciseFPMADOption = EnableFPMAD;
|
Options.LessPreciseFPMADOption = EnableFPMAD;
|
||||||
Options.NoFramePointerElim = DisableFPElim;
|
Options.NoFramePointerElim = DisableFPElim;
|
||||||
Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
|
Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
|
||||||
Options.NoExcessFPPrecision = DisableExcessPrecision;
|
Options.AllowExcessFPPrecision = EnableExcessPrecision;
|
||||||
Options.UnsafeFPMath = EnableUnsafeFPMath;
|
Options.UnsafeFPMath = EnableUnsafeFPMath;
|
||||||
Options.NoInfsFPMath = EnableNoInfsFPMath;
|
Options.NoInfsFPMath = EnableNoInfsFPMath;
|
||||||
Options.NoNaNsFPMath = EnableNoNaNsFPMath;
|
Options.NoNaNsFPMath = EnableNoNaNsFPMath;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user