diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index 1df4448abf0..be9a149d78b 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -48,6 +48,12 @@ def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", "Enable double precision denormal handling", [FeatureFP64]>; +def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", + "FastFMAF32", + "true", + "Assuming f32 fma is at least as fast as mul + add", + []>; + // Some instructions do not support denormals despite this flag. Using // fp32 denormals also causes instructions to run at the double // precision rate for the device. diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 541dbab709d..e08a7dd2689 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -65,7 +65,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false), DumpCode(false), R600ALUInst(false), HasVertexCache(false), TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), - FP64Denormals(false), FP32Denormals(false), CaymanISA(false), + FP64Denormals(false), FP32Denormals(false), + FastFMAF32(false), CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 389cc8caf27..0c0145cb2b3 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -55,6 +55,7 @@ private: bool FP64; bool FP64Denormals; bool FP32Denormals; + bool FastFMAF32; bool CaymanISA; bool FlatAddressSpace; bool EnableIRStructurizer; @@ -127,6 +128,10 @@ public: return FP64Denormals; } + bool hasFastFMAF32() const { + return FastFMAF32; + } + bool hasFlatAddressSpace() const { return FlatAddressSpace; } diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index 659301651c1..fb5aa6135cf 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -83,9 +83,13 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"SI", SIFullSpeedModel, + [FeatureSouthernIslands, FeatureFastFMAF32] +>; -def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureSouthernIslands, FeatureFastFMAF32] +>; def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; @@ -105,7 +109,9 @@ def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>; def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>; +def : ProcessorModel<"hawaii", SIFullSpeedModel, + [FeatureSeaIslands, FeatureFastFMAF32] +>; def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 6b2ea0682a4..12677f0e062 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -655,7 +655,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: - return false; /* There is V_MAD_F32 for f32 */ + return Subtarget->hasFastFMAF32(); case MVT::f64: return true; default: