Add generic fmad DAG node.

This allows sharing of FMA forming combines to work
with instructions that have the same semantics as a separate
multiply and add.

This is expand by default, and only formed post legalization
so it shouldn't have much impact on targets that do not want it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230070 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2015-02-20 22:10:33 +00:00
parent d5b5198e6d
commit 4bacfe2095
6 changed files with 172 additions and 95 deletions

View File

@ -229,7 +229,14 @@ namespace ISD {
SMULO, UMULO, SMULO, UMULO,
/// Simple binary floating point operators. /// Simple binary floating point operators.
FADD, FSUB, FMUL, FMA, FDIV, FREM, FADD, FSUB, FMUL, FDIV, FREM,
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
/// FMAD - Perform a * b + c, while getting the same result as the
/// separately rounded operations.
FMAD,
/// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This /// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This
/// DAG node does not require that X and Y have the same type, just that the /// DAG node does not require that X and Y have the same type, just that the

View File

@ -381,6 +381,7 @@ def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>; def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
def frem : SDNode<"ISD::FREM" , SDTFPBinOp>; def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>; def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>;
def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>;
def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>; def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp>; def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp>;
def fmaxnum : SDNode<"ISD::FMAXNUM" , SDTFPBinOp>; def fmaxnum : SDNode<"ISD::FMAXNUM" , SDTFPBinOp>;

View File

@ -6938,6 +6938,133 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
} }
// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
static SDValue performFaddFmulCombines(unsigned FusedOpcode,
bool Aggressive,
SDNode *N,
const TargetLowering &TLI,
SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL &&
(Aggressive || N1->hasOneUse())) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
// More folding opportunities when target permits.
if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
if (N0.getOpcode() == ISD::FMA &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
N1));
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
if (N1->getOpcode() == ISD::FMA &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,
N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),
N0));
}
}
return SDValue();
}
static SDValue performFsubFmulCombines(unsigned FusedOpcode,
bool Aggressive,
SDNode *N,
const TargetLowering &TLI,
SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
return DAG.getNode(FusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1));
}
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FMUL &&
(Aggressive || N1->hasOneUse()))
return DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG &&
N0.getOperand(0).getOpcode() == ISD::FMUL &&
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(FusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
DAG.getNode(ISD::FNEG, SL, VT, N1));
}
// More folding opportunities when target permits.
if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (N0.getOpcode() == FusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
return DAG.getNode(FusedOpcode, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (N1.getOpcode() == FusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(FusedOpcode, SDLoc(N), VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
N1.getOperand(0)),
N1.getOperand(1),
DAG.getNode(FusedOpcode, SDLoc(N), VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
N20),
N21, N0));
}
}
return SDValue();
}
SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0); SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1); SDValue N1 = N->getOperand(1);
@ -7077,23 +7204,27 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
} }
} // enable-unsafe-fp-math } // enable-unsafe-fp-math
if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
// Assume if there is an fmad instruction that it should be aggressively
// used.
if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
return Fused;
}
// FADD -> FMA combines: // FADD -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z) if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
if (N0.getOpcode() == ISD::FMUL && // Don't form FMA if we are preferring FMAD.
(N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) if (SDValue Fused
return DAG.getNode(ISD::FMA, SDLoc(N), VT, = performFaddFmulCombines(ISD::FMA,
N0.getOperand(0), N0.getOperand(1), N1); TLI.enableAggressiveFMAFusion(VT),
N, TLI, DAG)) {
// fold (fadd x, (fmul y, z)) -> (fma y, z, x) return Fused;
// Note: Commutes FADD operands. }
if (N1.getOpcode() == ISD::FMUL && }
(N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1), N0);
// When FP_EXTEND nodes are free on the target, and there is an opportunity // When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly. // to combine into FMA, arrange such nodes accordingly.
@ -7122,30 +7253,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N10.getOperand(1)), N0); N10.getOperand(1)), N0);
} }
} }
// More folding opportunities when target permits.
if (TLI.enableAggressiveFMAFusion(VT)) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
if (N0.getOpcode() == ISD::FMA &&
N0.getOperand(2).getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FMA, SDLoc(N), VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
N1));
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
if (N1->getOpcode() == ISD::FMA &&
N1.getOperand(2).getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1),
DAG.getNode(ISD::FMA, SDLoc(N), VT,
N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),
N0));
}
} }
return SDValue(); return SDValue();
@ -7207,43 +7314,32 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
} }
} }
if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
// Assume if there is an fmad instruction that it should be aggressively
// used.
if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
return Fused;
}
// FSUB -> FMA combines: // FSUB -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
if (N0.getOpcode() == ISD::FMUL && // Don't form FMA if we are preferring FMAD.
(N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, dl, VT, N1));
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) if (SDValue Fused
// Note: Commutes FSUB operands. = performFsubFmulCombines(ISD::FMA,
if (N1.getOpcode() == ISD::FMUL && TLI.enableAggressiveFMAFusion(VT),
(N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) N, TLI, DAG)) {
return DAG.getNode(ISD::FMA, dl, VT, return Fused;
DAG.getNode(ISD::FNEG, dl, VT, }
N1.getOperand(0)),
N1.getOperand(1), N0);
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG &&
N0.getOperand(0).getOpcode() == ISD::FMUL &&
((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
TLI.enableAggressiveFMAFusion(VT))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
DAG.getNode(ISD::FNEG, dl, VT, N1));
} }
// When FP_EXTEND nodes are free on the target, and there is an opportunity // When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly. // to combine into FMA, arrange such nodes accordingly.
if (TLI.isFPExtFree(VT)) { if (TLI.isFPExtFree(VT)) {
// fold (fsub (fpext (fmul x, y)), z) // fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z)) // -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) { if (N0.getOpcode() == ISD::FP_EXTEND) {
@ -7308,38 +7404,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
} }
} }
} }
// More folding opportunities when target permits.
if (TLI.enableAggressiveFMAFusion(VT)) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (N0.getOpcode() == ISD::FMA &&
N0.getOperand(2).getOpcode() == ISD::FMUL)
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FMA, SDLoc(N), VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
N1)));
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (N1.getOpcode() == ISD::FMA &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
N1.getOperand(0)),
N1.getOperand(1),
DAG.getNode(ISD::FMA, SDLoc(N), VT,
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
N20),
N21, N0));
}
}
} }
return SDValue(); return SDValue();

View File

@ -3519,6 +3519,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128)); RTLIB::FMA_PPCF128));
break; break;
case ISD::FMAD:
llvm_unreachable("Illegal fmad should never be formed");
case ISD::FADD: case ISD::FADD:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_F80, RTLIB::ADD_F128,

View File

@ -187,6 +187,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMUL: return "fmul"; case ISD::FMUL: return "fmul";
case ISD::FDIV: return "fdiv"; case ISD::FDIV: return "fdiv";
case ISD::FMA: return "fma"; case ISD::FMA: return "fma";
case ISD::FMAD: return "fmad";
case ISD::FREM: return "frem"; case ISD::FREM: return "frem";
case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign"; case ISD::FGETSIGN: return "fgetsign";

View File

@ -765,6 +765,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Expand); setOperationAction(ISD::FMINNUM, VT, Expand);
setOperationAction(ISD::FMAXNUM, VT, Expand); setOperationAction(ISD::FMAXNUM, VT, Expand);
setOperationAction(ISD::FMAD, VT, Expand);
// These library functions default to expand. // These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand); setOperationAction(ISD::FROUND, VT, Expand);