PTX: Re-work target sm/compute selection and add some basic GPU

targets: g80, gt200, gf100(fermi)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133799 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2011-06-24 16:27:49 +00:00
parent 40179bf874
commit 35f4fb34ff
6 changed files with 119 additions and 45 deletions

View File

@ -30,31 +30,51 @@ def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
//===- PTX Version --------------------------------------------------------===//
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
"Use PTX Language Version 2.0",
[]>;
"Use PTX Language Version 2.0">;
def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
"Use PTX Language Version 2.1",
[FeaturePTX20]>;
"Use PTX Language Version 2.1">;
def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
"Use PTX Language Version 2.2",
[FeaturePTX21]>;
"Use PTX Language Version 2.2">;
def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
"Use PTX Language Version 2.3",
[FeaturePTX22]>;
"Use PTX Language Version 2.3">;
//===- PTX Shader Model ---------------------------------------------------===//
//===- PTX Target ---------------------------------------------------------===//
def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
"Enable Shader Model 1.0 compliance">;
def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
"Enable Shader Model 1.3 compliance",
[FeatureSM10, FeatureDouble]>;
def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
"Enable Shader Model 2.0 compliance",
[FeatureSM13]>;
def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
"Use Shader Model 1.0">;
def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
"Use Shader Model 1.1">;
def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
"Use Shader Model 1.2">;
def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
"Use Shader Model 1.3">;
def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
"Use Shader Model 2.0">;
def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
"Use Shader Model 2.1">;
def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
"Use Shader Model 2.2">;
def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
"Use Shader Model 2.3">;
def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
"PTX_COMPUTE_1_0",
"Use Compute Compatibility 1.0">;
def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
"PTX_COMPUTE_1_1",
"Use Compute Compatibility 1.1">;
def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
"PTX_COMPUTE_1_2",
"Use Compute Compatibility 1.2">;
def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
"PTX_COMPUTE_1_3",
"Use Compute Compatibility 1.3">;
def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
"PTX_COMPUTE_2_0",
"Use Compute Compatibility 2.0">;
//===----------------------------------------------------------------------===//
// PTX supported processors
@ -65,6 +85,27 @@ class Proc<string Name, list<SubtargetFeature> Features>
def : Proc<"generic", []>;
// Processor definitions for compute/shader models
def : Proc<"compute_10", [FeatureCOMPUTE10]>;
def : Proc<"compute_11", [FeatureCOMPUTE11]>;
def : Proc<"compute_12", [FeatureCOMPUTE12]>;
def : Proc<"compute_13", [FeatureCOMPUTE13]>;
def : Proc<"compute_20", [FeatureCOMPUTE20]>;
def : Proc<"sm_10", [FeatureSM10]>;
def : Proc<"sm_11", [FeatureSM11]>;
def : Proc<"sm_12", [FeatureSM12]>;
def : Proc<"sm_13", [FeatureSM13]>;
def : Proc<"sm_20", [FeatureSM20]>;
def : Proc<"sm_21", [FeatureSM21]>;
def : Proc<"sm_22", [FeatureSM22]>;
def : Proc<"sm_23", [FeatureSM23]>;
// Processor definitions for common GPU architectures
def : Proc<"g80", [FeatureSM10]>;
def : Proc<"gt200", [FeatureSM13]>;
def : Proc<"gf100", [FeatureSM20, FeatureDouble]>;
def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//

View File

@ -462,7 +462,7 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
if (i != b) {
decl += ", ";
}
if (isKernel || ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) {
if (isKernel || ST.useParamSpaceForDeviceArgs()) {
decl += ".param .b";
decl += utostr(*i);
decl += " ";

View File

@ -213,7 +213,7 @@ SDValue PTXTargetLowering::
// We do one of two things here:
// IsKernel || SM >= 2.0 -> Use param space for arguments
// SM < 2.0 -> Use registers for arguments
if (MFI->isKernel() || ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) {
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
// We just need to emit the proper LOAD_PARAM ISDs
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {

View File

@ -26,10 +26,10 @@ def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
// Shader Model Support
def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">;
def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">;
def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
// PTX Version Support
def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
@ -613,43 +613,43 @@ def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
(ins RegF32:$a, RegF32:$b),
"div.rn.f32\t$d, $a, $b",
[(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
Requires<[SupportsSM13]>;
Requires<[FDivNeedsRoundingMode]>;
def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
(ins RegF32:$a, f32imm:$b),
"div.rn.f32\t$d, $a, $b",
[(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
Requires<[SupportsSM13]>;
Requires<[FDivNeedsRoundingMode]>;
def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
(ins RegF32:$a, RegF32:$b),
"div.f32\t$d, $a, $b",
[(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
Requires<[DoesNotSupportSM13]>;
Requires<[FDivNoRoundingMode]>;
def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
(ins RegF32:$a, f32imm:$b),
"div.f32\t$d, $a, $b",
[(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
Requires<[DoesNotSupportSM13]>;
Requires<[FDivNoRoundingMode]>;
def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
(ins RegF64:$a, RegF64:$b),
"div.rn.f64\t$d, $a, $b",
[(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
Requires<[SupportsSM13]>;
Requires<[FDivNeedsRoundingMode]>;
def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
(ins RegF64:$a, f64imm:$b),
"div.rn.f64\t$d, $a, $b",
[(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
Requires<[SupportsSM13]>;
Requires<[FDivNeedsRoundingMode]>;
def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
(ins RegF64:$a, RegF64:$b),
"div.f64\t$d, $a, $b",
[(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
Requires<[DoesNotSupportSM13]>;
Requires<[FDivNoRoundingMode]>;
def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
(ins RegF64:$a, f64imm:$b),
"div.f64\t$d, $a, $b",
[(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
Requires<[DoesNotSupportSM13]>;
Requires<[FDivNoRoundingMode]>;
@ -661,8 +661,10 @@ def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
// In the short term, mad is supported on all PTX versions and we use a
// default rounding mode no matter what shader model or PTX version.
// TODO: Allow the rounding mode to be selectable through llc.
defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>;
defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>;
defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
Requires<[FMadNoRoundingMode, SupportsFMA]>;
///===- Floating-Point Intrinsic Instructions -----------------------------===//

View File

@ -18,21 +18,31 @@ using namespace llvm;
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
bool is64Bit)
: PTXShaderModel(PTX_SM_1_0),
: PTXTarget(PTX_COMPUTE_1_0),
PTXVersion(PTX_VERSION_2_0),
SupportsDouble(false),
SupportsFMA(true),
Is64Bit(is64Bit) {
Is64Bit(is64Bit) {
std::string TARGET = "generic";
ParseSubtargetFeatures(FS, TARGET);
}
std::string PTXSubtarget::getTargetString() const {
switch(PTXShaderModel) {
default: llvm_unreachable("Unknown shader model");
switch(PTXTarget) {
default: llvm_unreachable("Unknown PTX target");
case PTX_SM_1_0: return "sm_10";
case PTX_SM_1_1: return "sm_11";
case PTX_SM_1_2: return "sm_12";
case PTX_SM_1_3: return "sm_13";
case PTX_SM_2_0: return "sm_20";
case PTX_SM_2_1: return "sm_21";
case PTX_SM_2_2: return "sm_22";
case PTX_SM_2_3: return "sm_23";
case PTX_COMPUTE_1_0: return "compute_10";
case PTX_COMPUTE_1_1: return "compute_11";
case PTX_COMPUTE_1_2: return "compute_12";
case PTX_COMPUTE_1_3: return "compute_13";
case PTX_COMPUTE_2_0: return "compute_20";
}
}

View File

@ -23,10 +23,23 @@ namespace llvm {
/**
* Enumeration of Shader Models supported by the back-end.
*/
enum PTXShaderModelEnum {
enum PTXTargetEnum {
PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
PTX_LAST_COMPUTE,
PTX_SM_1_0, /*< Shader Model 1.0 */
PTX_SM_1_1, /*< Shader Model 1.1 */
PTX_SM_1_2, /*< Shader Model 1.2 */
PTX_SM_1_3, /*< Shader Model 1.3 */
PTX_SM_2_0 /*< Shader Model 2.0 */
PTX_SM_2_0, /*< Shader Model 2.0 */
PTX_SM_2_1, /*< Shader Model 2.1 */
PTX_SM_2_2, /*< Shader Model 2.2 */
PTX_SM_2_3, /*< Shader Model 2.3 */
PTX_LAST_SM
};
/**
@ -44,7 +57,7 @@ namespace llvm {
private:
/// Shader Model supported on the target GPU.
PTXShaderModelEnum PTXShaderModel;
PTXTargetEnum PTXTarget;
/// PTX Language Version.
PTXVersionEnum PTXVersion;
@ -74,18 +87,26 @@ namespace llvm {
bool supportsFMA() const { return SupportsFMA; }
bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
PTXShaderModelEnum getShaderModel() const { return PTXShaderModel; }
bool fdivNeedsRoundingMode() const {
return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
(PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
}
bool fmadNeedsRoundingMode() const {
return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
(PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
}
bool useParamSpaceForDeviceArgs() const {
return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
}
std::string ParseSubtargetFeatures(const std::string &FS,
const std::string &CPU);