PTX: add flag to disable mad/fma selection

Patch by Dan Bailey git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131537 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-24 06:25:18 +00:00 · 2011-05-18 15:42:23 +00:00
parent d6dde76090
commit 657d1bed23
5 changed files with 35 additions and 4 deletions
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -24,6 +24,9 @@ include "llvm/Target/Target.td"
 def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
                                     "Do not demote .f64 to .f32">;
 def FeatureNoFMA  : SubtargetFeature<"no-fma","SupportsFMA", "false",
                                     "Disable Fused-Multiply Add">;
 //===- PTX Version --------------------------------------------------------===//
 def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -39,6 +39,10 @@ def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
 def SupportsPTX23       : Predicate<"getSubtarget().supportsPTX23()">;
 def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
 // Fused-Multiply Add
 def SupportsFMA         : Predicate<"getSubtarget().supportsFMA()">;
 def DoesNotSupportFMA   : Predicate<"!getSubtarget().supportsFMA()">;
 //===----------------------------------------------------------------------===//
 // Instruction Pattern Stuff
 //===----------------------------------------------------------------------===//
@@ -629,8 +633,8 @@ def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
 // In the short term, mad is supported on all PTX versions and we use a
 // default rounding mode no matter what shader model or PTX version.
 // TODO: Allow the rounding mode to be selectable through llc.
-defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13]>;
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>;
-defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>;
 ///===- Floating-Point Intrinsic Instructions -----------------------------===//
@@ -667,6 +671,8 @@ def FCOS64 : InstPTX<(outs RRegf64:$d),
 ///===- Comparison and Selection Instructions -----------------------------===//
 // .setp
 // Compare u16
 defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ,  "eq">;
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -21,6 +21,7 @@ PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
  : PTXShaderModel(PTX_SM_1_0),
    PTXVersion(PTX_VERSION_2_0),
    SupportsDouble(false),
    SupportsFMA(true),
    Is64Bit(is64Bit) {	
  std::string TARGET = "generic";
  ParseSubtargetFeatures(FS, TARGET);
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -50,6 +50,9 @@ namespace llvm {
      // The native .f64 type is supported on the hardware.
      bool SupportsDouble;
      // Support the fused-multiply add (FMA) and multiply-add (MAD) instructions
      bool SupportsFMA;
      // Use .u64 instead of .u32 for addresses.
      bool Is64Bit;
@@ -64,6 +67,8 @@ namespace llvm {
      bool is64Bit() const { return Is64Bit; }
      bool supportsFMA() const { return SupportsFMA; }
      bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
      bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
--- a/test/CodeGen/PTX/mad-disabling.ll
+++ b/test/CodeGen/PTX/mad-disabling.ll
@@ -0,0 +1,16 @@
 ; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
 ; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
 define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
 entry:
  %a = fmul float %x, %y
  %b = fadd float %a, %z
  ret float %b
 }
 define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
 entry:
  %a = fmul double %x, %y
  %b = fadd double %a, %z
  ret double %b
 }