PTX: Add preliminary support for floating-point divide and multiply-and-add

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127410 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2011-03-10 16:57:18 +00:00
parent 7deb187736
commit fca9efcbc4
5 changed files with 151 additions and 5 deletions

View File

@ -21,9 +21,22 @@ include "PTXInstrFormats.td"
// Code Generation Predicates
//===----------------------------------------------------------------------===//
// Addressing
def Use32BitAddresses : Predicate<"!getSubtarget().use64BitAddresses()">;
def Use64BitAddresses : Predicate<"getSubtarget().use64BitAddresses()">;
// Shader Model Support
def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">;
def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">;
def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
// PTX Version Support
def SupportsPTX20 : Predicate<"getSubtarget().supportsPTX20()">;
def DoesNotSupportPTX20 : Predicate<"!getSubtarget().supportsPTX20()">;
def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
@ -165,8 +178,8 @@ def PTXret
// Instruction Class Templates
//===----------------------------------------------------------------------===//
// Three-operand floating-point instruction template
multiclass FLOAT3<string opcstr, SDNode opnode> {
//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
def rr32 : InstPTX<(outs RRegf32:$d),
(ins RRegf32:$a, RRegf32:$b),
!strconcat(opcstr, ".f32\t$d, $a, $b"),
@ -185,6 +198,34 @@ multiclass FLOAT3<string opcstr, SDNode opnode> {
[(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
}
//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
def rrr32 : InstPTX<(outs RRegf32:$d),
(ins RRegf32:$a, RRegf32:$b, RRegf32:$c),
!strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
[(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
RRegf32:$b),
RRegf32:$c))]>;
def rri32 : InstPTX<(outs RRegf32:$d),
(ins RRegf32:$a, RRegf32:$b, f32imm:$c),
!strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
[(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
RRegf32:$b),
fpimm:$c))]>;
def rrr64 : InstPTX<(outs RRegf64:$d),
(ins RRegf64:$a, RRegf64:$b, RRegf64:$c),
!strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
[(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
RRegf64:$b),
RRegf64:$c))]>;
def rri64 : InstPTX<(outs RRegf64:$d),
(ins RRegf64:$a, RRegf64:$b, f64imm:$c),
!strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
[(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
RRegf64:$b),
fpimm:$c))]>;
}
multiclass INT3<string opcstr, SDNode opnode> {
def rr16 : InstPTX<(outs RRegu16:$d),
(ins RRegu16:$a, RRegu16:$b),
@ -304,9 +345,59 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
///===- Floating-Point Arithmetic Instructions ----------------------------===//
defm FADD : FLOAT3<"add", fadd>;
defm FSUB : FLOAT3<"sub", fsub>;
defm FMUL : FLOAT3<"mul", fmul>;
// Standard Binary Operations
defm FADD : PTX_FLOAT_3OP<"add", fadd>;
defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
// TODO: Allow user selection of rounding modes for fdiv.
// For division, we need to have f32 and f64 differently.
// For f32, we just always use .approx since it is supported on all hardware
// for PTX 1.4+, which is our minimum target.
def FDIVrr32 : InstPTX<(outs RRegf32:$d),
(ins RRegf32:$a, RRegf32:$b),
"div.approx.f32\t$d, $a, $b",
[(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>;
def FDIVri32 : InstPTX<(outs RRegf32:$d),
(ins RRegf32:$a, f32imm:$b),
"div.approx.f32\t$d, $a, $b",
[(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>;
// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d),
(ins RRegf64:$a, RRegf64:$b),
"div.rn.f64\t$d, $a, $b",
[(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
Requires<[SupportsSM13]>;
def FDIVri64SM13 : InstPTX<(outs RRegf64:$d),
(ins RRegf64:$a, f64imm:$b),
"div.rn.f64\t$d, $a, $b",
[(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
Requires<[SupportsSM13]>;
def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d),
(ins RRegf64:$a, RRegf64:$b),
"div.f64\t$d, $a, $b",
[(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
Requires<[DoesNotSupportSM13]>;
def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
(ins RRegf64:$a, f64imm:$b),
"div.f64\t$d, $a, $b",
[(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
Requires<[DoesNotSupportSM13]>;
// Multi-operation hybrid instructions
// The selection of mad/fma is tricky. In some cases, they are the *same*
// instruction, but in other cases we may prefer one or the other. Also,
// different PTX versions differ on whether rounding mode flags are required.
// In the short term, mad is supported on all PTX versions and we use a
// default rounding mode no matter what shader model or PTX version.
// TODO: Allow the rounding mode to be selectable through llc.
defm FMAD : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>;
///===- Integer Arithmetic Instructions -----------------------------------===//

View File

@ -54,6 +54,14 @@ namespace llvm {
bool use64BitAddresses() const { return Use64BitAddresses; }
bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
bool supportsPTX20() const { return PTXVersion >= PTX_VERSION_2_0; }
bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
std::string ParseSubtargetFeatures(const std::string &FS,
const std::string &CPU);
}; // class PTXSubtarget

View File

@ -0,0 +1,15 @@
; RUN: llc < %s -march=ptx -mattr=+sm10 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: div.approx.f32 f0, f1, f2;
; CHECK-NEXT: ret;
%a = fdiv float %x, %y
ret float %a
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: div.f64 fd0, fd1, fd2;
; CHECK-NEXT: ret;
%a = fdiv double %x, %y
ret double %a
}

View File

@ -0,0 +1,15 @@
; RUN: llc < %s -march=ptx -mattr=+sm13 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: div.approx.f32 f0, f1, f2;
; CHECK-NEXT: ret;
%a = fdiv float %x, %y
ret float %a
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: div.rn.f64 fd0, fd1, fd2;
; CHECK-NEXT: ret;
%a = fdiv double %x, %y
ret double %a
}

17
test/CodeGen/PTX/mad.ll Normal file
View File

@ -0,0 +1,17 @@
; RUN: llc < %s -march=ptx | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y, float %z) {
; CHECK: mad.rn.f32 f0, f1, f2, f3;
; CHECK-NEXT: ret;
%a = fmul float %x, %y
%b = fadd float %a, %z
ret float %b
}
define ptx_device double @t1_f64(double %x, double %y, double %z) {
; CHECK: mad.rn.f64 fd0, fd1, fd2, fd3;
; CHECK-NEXT: ret;
%a = fmul double %x, %y
%b = fadd double %a, %z
ret double %b
}