From 6e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5 Mon Sep 17 00:00:00 2001 From: Niels Ole Salscheider Date: Sat, 10 Aug 2013 10:38:54 +0000 Subject: [PATCH] R600/SI: FMA is faster than fmul and fadd for f64 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188136 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 18 +++++++++++++++++ lib/Target/R600/SIISelLowering.h | 1 + test/CodeGen/R600/fmuladd.ll | 31 ++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 test/CodeGen/R600/fmuladd.ll diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index b714fc19267..a76e6ee3145 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -338,6 +338,24 @@ MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { return MVT::i32; } +bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + return false; /* There is V_MAD_F32 for f32 */ + case MVT::f64: + return true; + default: + break; + } + + return false; +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index b4202c475d4..effbf1f85de 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -55,6 +55,7 @@ public: MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; virtual MVT getScalarShiftAmountTy(EVT VT) const; + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/R600/fmuladd.ll new file mode 100644 index 00000000000..ac379f44aaa --- /dev/null +++ b/test/CodeGen/R600/fmuladd.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +; CHECK: @fmuladd_f32 +; CHECK: V_MAD_F32 {{VGPR[0-9]+, VGPR[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} + +define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1, + float addrspace(1)* %in2, float addrspace(1)* %in3) { + %r0 = load float addrspace(1)* %in1 + %r1 = load float addrspace(1)* %in2 + %r2 = load float addrspace(1)* %in3 + %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2) + store float %r3, float addrspace(1)* %out + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) + +; CHECK: @fmuladd_f64 +; CHECK: V_FMA_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}} + +define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2, double addrspace(1)* %in3) { + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r2 = load double addrspace(1)* %in3 + %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2) + store double %r3, double addrspace(1)* %out + ret void +} + +declare double @llvm.fmuladd.f64(double, double, double)