From 35f4fb34ff60b8f23b2c9691b312bc67cac95eb4 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 24 Jun 2011 16:27:49 +0000 Subject: [PATCH] PTX: Re-work target sm/compute selection and add some basic GPU targets: g80, gt200, gf100(fermi) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133799 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/PTX.td | 75 +++++++++++++++++++++++------- lib/Target/PTX/PTXAsmPrinter.cpp | 2 +- lib/Target/PTX/PTXISelLowering.cpp | 2 +- lib/Target/PTX/PTXInstrInfo.td | 30 ++++++------ lib/Target/PTX/PTXSubtarget.cpp | 18 +++++-- lib/Target/PTX/PTXSubtarget.h | 37 +++++++++++---- 6 files changed, 119 insertions(+), 45 deletions(-) diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 6a36b24d800..f6fbe9fffc6 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -30,31 +30,51 @@ def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false", //===- PTX Version --------------------------------------------------------===// def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", - "Use PTX Language Version 2.0", - []>; + "Use PTX Language Version 2.0">; def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", - "Use PTX Language Version 2.1", - [FeaturePTX20]>; + "Use PTX Language Version 2.1">; def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", - "Use PTX Language Version 2.2", - [FeaturePTX21]>; + "Use PTX Language Version 2.2">; def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3", - "Use PTX Language Version 2.3", - [FeaturePTX22]>; + "Use PTX Language Version 2.3">; -//===- PTX Shader Model ---------------------------------------------------===// +//===- PTX Target ---------------------------------------------------------===// -def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0", - "Enable Shader Model 1.0 compliance">; -def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3", - "Enable Shader Model 1.3 compliance", - [FeatureSM10, FeatureDouble]>; -def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0", - "Enable Shader Model 2.0 compliance", - [FeatureSM13]>; +def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0", + "Use Shader Model 1.0">; +def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1", + "Use Shader Model 1.1">; +def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", + "Use Shader Model 1.2">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", + "Use Shader Model 1.3">; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", + "Use Shader Model 2.0">; +def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", + "Use Shader Model 2.1">; +def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", + "Use Shader Model 2.2">; +def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", + "Use Shader Model 2.3">; + +def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", + "PTX_COMPUTE_1_0", + "Use Compute Compatibility 1.0">; +def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget", + "PTX_COMPUTE_1_1", + "Use Compute Compatibility 1.1">; +def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget", + "PTX_COMPUTE_1_2", + "Use Compute Compatibility 1.2">; +def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", + "PTX_COMPUTE_1_3", + "Use Compute Compatibility 1.3">; +def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", + "PTX_COMPUTE_2_0", + "Use Compute Compatibility 2.0">; //===----------------------------------------------------------------------===// // PTX supported processors @@ -65,6 +85,27 @@ class Proc Features> def : Proc<"generic", []>; +// Processor definitions for compute/shader models +def : Proc<"compute_10", [FeatureCOMPUTE10]>; +def : Proc<"compute_11", [FeatureCOMPUTE11]>; +def : Proc<"compute_12", [FeatureCOMPUTE12]>; +def : Proc<"compute_13", [FeatureCOMPUTE13]>; +def : Proc<"compute_20", [FeatureCOMPUTE20]>; +def : Proc<"sm_10", [FeatureSM10]>; +def : Proc<"sm_11", [FeatureSM11]>; +def : Proc<"sm_12", [FeatureSM12]>; +def : Proc<"sm_13", [FeatureSM13]>; +def : Proc<"sm_20", [FeatureSM20]>; +def : Proc<"sm_21", [FeatureSM21]>; +def : Proc<"sm_22", [FeatureSM22]>; +def : Proc<"sm_23", [FeatureSM23]>; + +// Processor definitions for common GPU architectures +def : Proc<"g80", [FeatureSM10]>; +def : Proc<"gt200", [FeatureSM13]>; +def : Proc<"gf100", [FeatureSM20, FeatureDouble]>; +def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 5d7e4c3ff79..23268d6931c 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -462,7 +462,7 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { if (i != b) { decl += ", "; } - if (isKernel || ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) { + if (isKernel || ST.useParamSpaceForDeviceArgs()) { decl += ".param .b"; decl += utostr(*i); decl += " "; diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index c82149301a0..6b7954d4e9d 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -213,7 +213,7 @@ SDValue PTXTargetLowering:: // We do one of two things here: // IsKernel || SM >= 2.0 -> Use param space for arguments // SM < 2.0 -> Use registers for arguments - if (MFI->isKernel() || ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) { + if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { // We just need to emit the proper LOAD_PARAM ISDs for (unsigned i = 0, e = Ins.size(); i != e; ++i) { diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 1c18c4aa33e..a6c03e54ae6 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -26,10 +26,10 @@ def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; // Shader Model Support -def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">; -def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">; -def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">; -def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">; +def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; +def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; +def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">; +def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">; // PTX Version Support def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; @@ -613,43 +613,43 @@ def FDIVrr32SM13 : InstPTX<(outs RegF32:$d), (ins RegF32:$a, RegF32:$b), "div.rn.f32\t$d, $a, $b", [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[SupportsSM13]>; + Requires<[FDivNeedsRoundingMode]>; def FDIVri32SM13 : InstPTX<(outs RegF32:$d), (ins RegF32:$a, f32imm:$b), "div.rn.f32\t$d, $a, $b", [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[SupportsSM13]>; + Requires<[FDivNeedsRoundingMode]>; def FDIVrr32SM10 : InstPTX<(outs RegF32:$d), (ins RegF32:$a, RegF32:$b), "div.f32\t$d, $a, $b", [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[DoesNotSupportSM13]>; + Requires<[FDivNoRoundingMode]>; def FDIVri32SM10 : InstPTX<(outs RegF32:$d), (ins RegF32:$a, f32imm:$b), "div.f32\t$d, $a, $b", [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[DoesNotSupportSM13]>; + Requires<[FDivNoRoundingMode]>; def FDIVrr64SM13 : InstPTX<(outs RegF64:$d), (ins RegF64:$a, RegF64:$b), "div.rn.f64\t$d, $a, $b", [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[SupportsSM13]>; + Requires<[FDivNeedsRoundingMode]>; def FDIVri64SM13 : InstPTX<(outs RegF64:$d), (ins RegF64:$a, f64imm:$b), "div.rn.f64\t$d, $a, $b", [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[SupportsSM13]>; + Requires<[FDivNeedsRoundingMode]>; def FDIVrr64SM10 : InstPTX<(outs RegF64:$d), (ins RegF64:$a, RegF64:$b), "div.f64\t$d, $a, $b", [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[DoesNotSupportSM13]>; + Requires<[FDivNoRoundingMode]>; def FDIVri64SM10 : InstPTX<(outs RegF64:$d), (ins RegF64:$a, f64imm:$b), "div.f64\t$d, $a, $b", [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[DoesNotSupportSM13]>; + Requires<[FDivNoRoundingMode]>; @@ -661,8 +661,10 @@ def FDIVri64SM10 : InstPTX<(outs RegF64:$d), // In the short term, mad is supported on all PTX versions and we use a // default rounding mode no matter what shader model or PTX version. // TODO: Allow the rounding mode to be selectable through llc. -defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>; -defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>; +defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, + Requires<[FMadNeedsRoundingMode, SupportsFMA]>; +defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, + Requires<[FMadNoRoundingMode, SupportsFMA]>; ///===- Floating-Point Intrinsic Instructions -----------------------------===// diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index e8a1dfecd00..77e3431f46b 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -18,21 +18,31 @@ using namespace llvm; PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit) - : PTXShaderModel(PTX_SM_1_0), + : PTXTarget(PTX_COMPUTE_1_0), PTXVersion(PTX_VERSION_2_0), SupportsDouble(false), SupportsFMA(true), - Is64Bit(is64Bit) { + Is64Bit(is64Bit) { std::string TARGET = "generic"; ParseSubtargetFeatures(FS, TARGET); } std::string PTXSubtarget::getTargetString() const { - switch(PTXShaderModel) { - default: llvm_unreachable("Unknown shader model"); + switch(PTXTarget) { + default: llvm_unreachable("Unknown PTX target"); case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_1: return "sm_11"; + case PTX_SM_1_2: return "sm_12"; case PTX_SM_1_3: return "sm_13"; case PTX_SM_2_0: return "sm_20"; + case PTX_SM_2_1: return "sm_21"; + case PTX_SM_2_2: return "sm_22"; + case PTX_SM_2_3: return "sm_23"; + case PTX_COMPUTE_1_0: return "compute_10"; + case PTX_COMPUTE_1_1: return "compute_11"; + case PTX_COMPUTE_1_2: return "compute_12"; + case PTX_COMPUTE_1_3: return "compute_13"; + case PTX_COMPUTE_2_0: return "compute_20"; } } diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index 2ebe6cfdc83..58d192bd8fe 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -23,10 +23,23 @@ namespace llvm { /** * Enumeration of Shader Models supported by the back-end. */ - enum PTXShaderModelEnum { + enum PTXTargetEnum { + PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */ + PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */ + PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */ + PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */ + PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */ + PTX_LAST_COMPUTE, + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_1, /*< Shader Model 1.1 */ + PTX_SM_1_2, /*< Shader Model 1.2 */ PTX_SM_1_3, /*< Shader Model 1.3 */ - PTX_SM_2_0 /*< Shader Model 2.0 */ + PTX_SM_2_0, /*< Shader Model 2.0 */ + PTX_SM_2_1, /*< Shader Model 2.1 */ + PTX_SM_2_2, /*< Shader Model 2.2 */ + PTX_SM_2_3, /*< Shader Model 2.3 */ + PTX_LAST_SM }; /** @@ -44,7 +57,7 @@ namespace llvm { private: /// Shader Model supported on the target GPU. - PTXShaderModelEnum PTXShaderModel; + PTXTargetEnum PTXTarget; /// PTX Language Version. PTXVersionEnum PTXVersion; @@ -74,18 +87,26 @@ namespace llvm { bool supportsFMA() const { return SupportsFMA; } - bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; } - - bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; } - bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; } - PTXShaderModelEnum getShaderModel() const { return PTXShaderModel; } + bool fdivNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + bool fmadNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool useParamSpaceForDeviceArgs() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);