diff --git a/docs/LangRef.html b/docs/LangRef.html index bc72144ff71..3201731e5ae 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -241,6 +241,7 @@
  • 'llvm.pow.*' Intrinsic
  • 'llvm.exp.*' Intrinsic
  • 'llvm.log.*' Intrinsic
  • +
  • 'llvm.fma.*' Intrinsic
  • Bit Manipulation Intrinsics @@ -6570,6 +6571,37 @@ LLVM.

    This function returns the same values as the libm log functions would, and handles error conditions in the same way.

    +

    + 'llvm.fma.*' Intrinsic +

    + +
    + +
    Syntax:
    +

    This is an overloaded intrinsic. You can use llvm.fma on any + floating point or vector of floating point type. Not all targets support all + types however.

    + +
    +  declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
    +  declare double    @llvm.fma.f64(double %a, double %b, double %c)
    +  declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
    +  declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
    +  declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
    +
    + +
    Overview:
    +

    The 'llvm.fma.*' intrinsics perform the fused multiply-accumulate + operation.

    + +
    Arguments:
    +

    The argument and return value are floating point numbers of the same + type.

    + +
    Semantics:
    +

    This function returns the same values as the libm fma functions + would.

    +
    diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 498614e7fd5..459cecda21e 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -232,7 +232,7 @@ namespace ISD { SMULO, UMULO, // Simple binary floating point operators. - FADD, FSUB, FMUL, FDIV, FREM, + FADD, FSUB, FMUL, FMA, FDIV, FREM, // FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This // DAG node does not require that X and Y have the same type, just that they diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 44d94779784..4bfd4ab530d 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -103,6 +103,10 @@ namespace RTLIB { REM_F64, REM_F80, REM_PPCF128, + FMA_F32, + FMA_F64, + FMA_F80, + FMA_PPCF128, POWI_F32, POWI_F64, POWI_F80, diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index c854471036d..947cf1be7d4 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -255,6 +255,12 @@ let Properties = [IntrReadMem] in { def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; } +let Properties = [IntrNoMem] in { + def int_fma : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>]>; +} + // NOTE: these are internal interfaces. def int_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; def int_longjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 4b6d3ef5473..308150fe54a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3351,6 +3351,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_PPCF128)); break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_PPCF128)); + break; case ISD::FP16_TO_FP32: Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 27a466b3a92..e6835d87f82 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -74,6 +74,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; @@ -294,6 +295,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { NVT, &Op, 1, false, N->getDebugLoc()); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), @@ -837,6 +851,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; @@ -989,6 +1004,19 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4597ec93356..952797dc75b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -378,6 +378,7 @@ private: SDValue SoftenFloatRes_FLOG(SDNode *N); SDValue SoftenFloatRes_FLOG2(SDNode *N); SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N); @@ -442,6 +443,7 @@ private: void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f5e4526b6eb..349b4d5c680 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5878,6 +5878,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSUB: return "fsub"; case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2afec3328b9..3c8b04fe89f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4651,6 +4651,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::pow: visitPow(I); return 0; + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f827f0124ce..4a7afe475e7 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -139,6 +139,10 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::REM_F64] = "fmod"; Names[RTLIB::REM_F80] = "fmodl"; Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; Names[RTLIB::POWI_F32] = "__powisf2"; Names[RTLIB::POWI_F64] = "__powidf2"; Names[RTLIB::POWI_F80] = "__powixf2"; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d9bb3e65fa4..4340a714778 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -708,6 +708,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + // Various VFP goodness if (!UseSoftFloat && !Subtarget->isThumb1Only()) { // int <-> fp are custom expanded into bit_convert + ARMISD ops. diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index daf95551896..de003fb4c65 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -122,6 +122,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Promote); setOperationAction(ISD::BITCAST, MVT::f32, Promote); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index f9b50419e7b..81d664f70dc 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -221,6 +221,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index ba2de4044db..62dfdcc2fd1 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -69,6 +69,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) // Floating point operations which are not supported setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 1a9f3889d28..b4f4b1b4bf0 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -146,6 +146,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FLOG2, MVT::f32, Expand); setOperationAction(ISD::FLOG10, MVT::f32, Expand); setOperationAction(ISD::FEXP, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 979683e6d8c..9741a3902af 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -125,10 +125,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index a2bda6c9e84..6f30d3fd6c3 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -754,9 +754,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTLZ , MVT::i32, Expand); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index af85df53b05..871c2972a8c 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -142,6 +142,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); // We have only 64-bit bitconverts setOperationAction(ISD::BITCAST, MVT::f32, Expand); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 709b84c1fa2..f212f244ecf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -646,6 +646,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS } + // We don't support FMA. + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + // Long double always uses X87. if (!UseSoftFloat) { addRegisterClass(MVT::f80, X86::RFP80RegisterClass); @@ -670,6 +674,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f80 , Expand); setOperationAction(ISD::FCOS , MVT::f80 , Expand); } + + setOperationAction(ISD::FMA, MVT::f80, Expand); } // Always use a library call for pow. diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll new file mode 100644 index 00000000000..5deedb9dd9b --- /dev/null +++ b/test/CodeGen/X86/fma.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s + +; CHECK: test_f32 +; CHECK: _fmaf + +define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp { +entry: + %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone + ret float %call +} + +; CHECK: test_f64 +; CHECK: _fma + +define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp { +entry: + %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone + ret double %call +} + +; CHECK: test_f80 +; CHECK: _fmal + +define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone ssp { +entry: + %call = tail call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone + ret x86_fp80 %call +} + +declare float @llvm.fma.f32(float, float, float) nounwind readnone +declare double @llvm.fma.f64(double, double, double) nounwind readnone +declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) nounwind readnone