mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-15 20:06:46 +00:00
PTX: Add preliminary support for floating-point divide and multiply-and-add
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127410 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7deb187736
commit
fca9efcbc4
@ -21,9 +21,22 @@ include "PTXInstrFormats.td"
|
||||
// Code Generation Predicates
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Addressing
|
||||
def Use32BitAddresses : Predicate<"!getSubtarget().use64BitAddresses()">;
|
||||
def Use64BitAddresses : Predicate<"getSubtarget().use64BitAddresses()">;
|
||||
|
||||
// Shader Model Support
|
||||
def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">;
|
||||
def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
|
||||
def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">;
|
||||
def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
|
||||
|
||||
// PTX Version Support
|
||||
def SupportsPTX20 : Predicate<"getSubtarget().supportsPTX20()">;
|
||||
def DoesNotSupportPTX20 : Predicate<"!getSubtarget().supportsPTX20()">;
|
||||
def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
|
||||
def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Pattern Stuff
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -165,8 +178,8 @@ def PTXret
|
||||
// Instruction Class Templates
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Three-operand floating-point instruction template
|
||||
multiclass FLOAT3<string opcstr, SDNode opnode> {
|
||||
//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
|
||||
multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
|
||||
def rr32 : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, RRegf32:$b),
|
||||
!strconcat(opcstr, ".f32\t$d, $a, $b"),
|
||||
@ -185,6 +198,34 @@ multiclass FLOAT3<string opcstr, SDNode opnode> {
|
||||
[(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
|
||||
}
|
||||
|
||||
//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
|
||||
multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
|
||||
def rrr32 : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, RRegf32:$b, RRegf32:$c),
|
||||
!strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
|
||||
[(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
|
||||
RRegf32:$b),
|
||||
RRegf32:$c))]>;
|
||||
def rri32 : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, RRegf32:$b, f32imm:$c),
|
||||
!strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
|
||||
[(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
|
||||
RRegf32:$b),
|
||||
fpimm:$c))]>;
|
||||
def rrr64 : InstPTX<(outs RRegf64:$d),
|
||||
(ins RRegf64:$a, RRegf64:$b, RRegf64:$c),
|
||||
!strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
|
||||
[(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
|
||||
RRegf64:$b),
|
||||
RRegf64:$c))]>;
|
||||
def rri64 : InstPTX<(outs RRegf64:$d),
|
||||
(ins RRegf64:$a, RRegf64:$b, f64imm:$c),
|
||||
!strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
|
||||
[(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
|
||||
RRegf64:$b),
|
||||
fpimm:$c))]>;
|
||||
}
|
||||
|
||||
multiclass INT3<string opcstr, SDNode opnode> {
|
||||
def rr16 : InstPTX<(outs RRegu16:$d),
|
||||
(ins RRegu16:$a, RRegu16:$b),
|
||||
@ -304,9 +345,59 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
|
||||
|
||||
///===- Floating-Point Arithmetic Instructions ----------------------------===//
|
||||
|
||||
defm FADD : FLOAT3<"add", fadd>;
|
||||
defm FSUB : FLOAT3<"sub", fsub>;
|
||||
defm FMUL : FLOAT3<"mul", fmul>;
|
||||
// Standard Binary Operations
|
||||
defm FADD : PTX_FLOAT_3OP<"add", fadd>;
|
||||
defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
|
||||
defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
|
||||
|
||||
// TODO: Allow user selection of rounding modes for fdiv.
|
||||
// For division, we need to have f32 and f64 differently.
|
||||
// For f32, we just always use .approx since it is supported on all hardware
|
||||
// for PTX 1.4+, which is our minimum target.
|
||||
def FDIVrr32 : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, RRegf32:$b),
|
||||
"div.approx.f32\t$d, $a, $b",
|
||||
[(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>;
|
||||
def FDIVri32 : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, f32imm:$b),
|
||||
"div.approx.f32\t$d, $a, $b",
|
||||
[(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>;
|
||||
|
||||
// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
|
||||
def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d),
|
||||
(ins RRegf64:$a, RRegf64:$b),
|
||||
"div.rn.f64\t$d, $a, $b",
|
||||
[(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
|
||||
Requires<[SupportsSM13]>;
|
||||
def FDIVri64SM13 : InstPTX<(outs RRegf64:$d),
|
||||
(ins RRegf64:$a, f64imm:$b),
|
||||
"div.rn.f64\t$d, $a, $b",
|
||||
[(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
|
||||
Requires<[SupportsSM13]>;
|
||||
def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d),
|
||||
(ins RRegf64:$a, RRegf64:$b),
|
||||
"div.f64\t$d, $a, $b",
|
||||
[(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
|
||||
Requires<[DoesNotSupportSM13]>;
|
||||
def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
|
||||
(ins RRegf64:$a, f64imm:$b),
|
||||
"div.f64\t$d, $a, $b",
|
||||
[(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
|
||||
Requires<[DoesNotSupportSM13]>;
|
||||
|
||||
|
||||
|
||||
// Multi-operation hybrid instructions
|
||||
|
||||
// The selection of mad/fma is tricky. In some cases, they are the *same*
|
||||
// instruction, but in other cases we may prefer one or the other. Also,
|
||||
// different PTX versions differ on whether rounding mode flags are required.
|
||||
// In the short term, mad is supported on all PTX versions and we use a
|
||||
// default rounding mode no matter what shader model or PTX version.
|
||||
// TODO: Allow the rounding mode to be selectable through llc.
|
||||
defm FMAD : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>;
|
||||
|
||||
|
||||
|
||||
///===- Integer Arithmetic Instructions -----------------------------------===//
|
||||
|
||||
|
@ -54,6 +54,14 @@ namespace llvm {
|
||||
|
||||
bool use64BitAddresses() const { return Use64BitAddresses; }
|
||||
|
||||
bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
|
||||
|
||||
bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
|
||||
|
||||
bool supportsPTX20() const { return PTXVersion >= PTX_VERSION_2_0; }
|
||||
|
||||
bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
|
||||
|
||||
std::string ParseSubtargetFeatures(const std::string &FS,
|
||||
const std::string &CPU);
|
||||
}; // class PTXSubtarget
|
||||
|
15
test/CodeGen/PTX/fdiv-sm10.ll
Normal file
15
test/CodeGen/PTX/fdiv-sm10.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llc < %s -march=ptx -mattr=+sm10 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: div.approx.f32 f0, f1, f2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv float %x, %y
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: div.f64 fd0, fd1, fd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv double %x, %y
|
||||
ret double %a
|
||||
}
|
15
test/CodeGen/PTX/fdiv-sm13.ll
Normal file
15
test/CodeGen/PTX/fdiv-sm13.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llc < %s -march=ptx -mattr=+sm13 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: div.approx.f32 f0, f1, f2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv float %x, %y
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: div.rn.f64 fd0, fd1, fd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv double %x, %y
|
||||
ret double %a
|
||||
}
|
17
test/CodeGen/PTX/mad.ll
Normal file
17
test/CodeGen/PTX/mad.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y, float %z) {
|
||||
; CHECK: mad.rn.f32 f0, f1, f2, f3;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fmul float %x, %y
|
||||
%b = fadd float %a, %z
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y, double %z) {
|
||||
; CHECK: mad.rn.f64 fd0, fd1, fd2, fd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fmul double %x, %y
|
||||
%b = fadd double %a, %z
|
||||
ret double %b
|
||||
}
|
Loading…
Reference in New Issue
Block a user