mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-24 06:25:18 +00:00
PTX: add flag to disable mad/fma selection
Patch by Dan Bailey git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131537 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -24,6 +24,9 @@ include "llvm/Target/Target.td"
|
|||||||
def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
|
def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
|
||||||
"Do not demote .f64 to .f32">;
|
"Do not demote .f64 to .f32">;
|
||||||
|
|
||||||
|
def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
|
||||||
|
"Disable Fused-Multiply Add">;
|
||||||
|
|
||||||
//===- PTX Version --------------------------------------------------------===//
|
//===- PTX Version --------------------------------------------------------===//
|
||||||
|
|
||||||
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
|
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
|
||||||
|
@@ -39,6 +39,10 @@ def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
|
|||||||
def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">;
|
def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">;
|
||||||
def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
|
def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
|
||||||
|
|
||||||
|
// Fused-Multiply Add
|
||||||
|
def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
|
||||||
|
def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Instruction Pattern Stuff
|
// Instruction Pattern Stuff
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@@ -629,8 +633,8 @@ def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
|
|||||||
// In the short term, mad is supported on all PTX versions and we use a
|
// In the short term, mad is supported on all PTX versions and we use a
|
||||||
// default rounding mode no matter what shader model or PTX version.
|
// default rounding mode no matter what shader model or PTX version.
|
||||||
// TODO: Allow the rounding mode to be selectable through llc.
|
// TODO: Allow the rounding mode to be selectable through llc.
|
||||||
defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13]>;
|
defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>;
|
||||||
defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13]>;
|
defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>;
|
||||||
|
|
||||||
///===- Floating-Point Intrinsic Instructions -----------------------------===//
|
///===- Floating-Point Intrinsic Instructions -----------------------------===//
|
||||||
|
|
||||||
@@ -667,6 +671,8 @@ def FCOS64 : InstPTX<(outs RRegf64:$d),
|
|||||||
|
|
||||||
///===- Comparison and Selection Instructions -----------------------------===//
|
///===- Comparison and Selection Instructions -----------------------------===//
|
||||||
|
|
||||||
|
// .setp
|
||||||
|
|
||||||
// Compare u16
|
// Compare u16
|
||||||
|
|
||||||
defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">;
|
defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">;
|
||||||
|
@@ -21,6 +21,7 @@ PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
|
|||||||
: PTXShaderModel(PTX_SM_1_0),
|
: PTXShaderModel(PTX_SM_1_0),
|
||||||
PTXVersion(PTX_VERSION_2_0),
|
PTXVersion(PTX_VERSION_2_0),
|
||||||
SupportsDouble(false),
|
SupportsDouble(false),
|
||||||
|
SupportsFMA(true),
|
||||||
Is64Bit(is64Bit) {
|
Is64Bit(is64Bit) {
|
||||||
std::string TARGET = "generic";
|
std::string TARGET = "generic";
|
||||||
ParseSubtargetFeatures(FS, TARGET);
|
ParseSubtargetFeatures(FS, TARGET);
|
||||||
|
@@ -50,6 +50,9 @@ namespace llvm {
|
|||||||
// The native .f64 type is supported on the hardware.
|
// The native .f64 type is supported on the hardware.
|
||||||
bool SupportsDouble;
|
bool SupportsDouble;
|
||||||
|
|
||||||
|
// Support the fused-multiply add (FMA) and multiply-add (MAD) instructions
|
||||||
|
bool SupportsFMA;
|
||||||
|
|
||||||
// Use .u64 instead of .u32 for addresses.
|
// Use .u64 instead of .u32 for addresses.
|
||||||
bool Is64Bit;
|
bool Is64Bit;
|
||||||
|
|
||||||
@@ -64,6 +67,8 @@ namespace llvm {
|
|||||||
|
|
||||||
bool is64Bit() const { return Is64Bit; }
|
bool is64Bit() const { return Is64Bit; }
|
||||||
|
|
||||||
|
bool supportsFMA() const { return SupportsFMA; }
|
||||||
|
|
||||||
bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
|
bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
|
||||||
|
|
||||||
bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
|
bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
|
||||||
|
16
test/CodeGen/PTX/mad-disabling.ll
Normal file
16
test/CodeGen/PTX/mad-disabling.ll
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
|
||||||
|
; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
|
||||||
|
|
||||||
|
define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
|
||||||
|
entry:
|
||||||
|
%a = fmul float %x, %y
|
||||||
|
%b = fadd float %a, %z
|
||||||
|
ret float %b
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
|
||||||
|
entry:
|
||||||
|
%a = fmul double %x, %y
|
||||||
|
%b = fadd double %a, %z
|
||||||
|
ret double %b
|
||||||
|
}
|
Reference in New Issue
Block a user