mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 02:33:33 +00:00
Add preliminary support for .f32 in the PTX backend.
- Add appropriate TableGen patterns for fadd, fsub, fmul. - Add .f32 as the PTX type for the LLVM float type. - Allow parameters, return values, and global variable declarations to accept the float type. - Add appropriate test cases. Patch by Justin Holewinski git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126636 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d8d1584c13
commit
f71720231f
@ -84,6 +84,7 @@ static const char PARAM_PREFIX[] = "__param_";
|
||||
static const char *getRegisterTypeName(unsigned RegNo) {
|
||||
#define TEST_REGCLS(cls, clsstr) \
|
||||
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
|
||||
TEST_REGCLS(RRegf32, f32);
|
||||
TEST_REGCLS(RRegs32, s32);
|
||||
TEST_REGCLS(Preds, pred);
|
||||
#undef TEST_REGCLS
|
||||
@ -115,6 +116,21 @@ static const char *getStateSpaceName(unsigned addressSpace) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const char *getTypeName(const Type* type) {
|
||||
while (true) {
|
||||
switch (type->getTypeID()) {
|
||||
default: llvm_unreachable("Unknown type");
|
||||
case Type::FloatTyID: return ".f32";
|
||||
case Type::IntegerTyID: return ".s32"; // TODO: Handle 64-bit types.
|
||||
case Type::ArrayTyID:
|
||||
case Type::PointerTyID:
|
||||
type = dyn_cast<const SequentialType>(type)->getElementType();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool PTXAsmPrinter::doFinalization(Module &M) {
|
||||
// XXX Temproarily remove global variables so that doFinalization() will not
|
||||
// emit them again (global variables are emitted at beginning).
|
||||
@ -218,6 +234,15 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
|
||||
case MachineOperand::MO_Register:
|
||||
OS << getRegisterName(MO.getReg());
|
||||
break;
|
||||
case MachineOperand::MO_FPImmediate:
|
||||
APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
|
||||
if (constFP.getZExtValue() > 0) {
|
||||
OS << "0F" << constFP.toString(16, false);
|
||||
}
|
||||
else {
|
||||
OS << "0F00000000";
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -265,8 +290,8 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
|
||||
decl += " ";
|
||||
}
|
||||
|
||||
// TODO: add types
|
||||
decl += ".s32 ";
|
||||
decl += getTypeName(gv->getType());
|
||||
decl += " ";
|
||||
|
||||
decl += gvsym->getName();
|
||||
|
||||
|
@ -28,9 +28,12 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
|
||||
// Set up the register classes.
|
||||
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
|
||||
addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
|
||||
addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
|
||||
|
||||
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
|
||||
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
||||
|
||||
// Customize translation of memory addresses
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
|
||||
@ -87,7 +90,8 @@ struct argmap_entry {
|
||||
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
|
||||
} argmap[] = {
|
||||
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
|
||||
argmap_entry(MVT::i32, PTX::RRegs32RegisterClass)
|
||||
argmap_entry(MVT::i32, PTX::RRegs32RegisterClass),
|
||||
argmap_entry(MVT::f32, PTX::RRegf32RegisterClass)
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -185,10 +189,18 @@ SDValue PTXTargetLowering::
|
||||
if (Outs.size() == 0)
|
||||
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
|
||||
|
||||
assert(Outs[0].VT == MVT::i32 && "Can return only basic types");
|
||||
|
||||
SDValue Flag;
|
||||
unsigned reg = PTX::R0;
|
||||
unsigned reg;
|
||||
|
||||
if (Outs[0].VT == MVT::i32) {
|
||||
reg = PTX::R0;
|
||||
}
|
||||
else if (Outs[0].VT == MVT::f32) {
|
||||
reg = PTX::F0;
|
||||
}
|
||||
else {
|
||||
assert(false && "Can return only basic types");
|
||||
}
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
|
||||
|
@ -28,6 +28,7 @@ static const struct map_entry {
|
||||
const int opcode;
|
||||
} map[] = {
|
||||
{ &PTX::RRegs32RegClass, PTX::MOVrr },
|
||||
{ &PTX::RRegf32RegClass, PTX::MOVrr },
|
||||
{ &PTX::PredsRegClass, PTX::MOVpp }
|
||||
};
|
||||
|
||||
@ -35,12 +36,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DstReg, unsigned SrcReg,
|
||||
bool KillSrc) const {
|
||||
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
|
||||
if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) {
|
||||
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
|
||||
if (map[i].cls->contains(DstReg, SrcReg)) {
|
||||
BuildMI(MBB, I, DL,
|
||||
get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
|
||||
get(map[i].opcode), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
llvm_unreachable("Impossible reg-to-reg copy");
|
||||
}
|
||||
|
@ -143,6 +143,18 @@ def PTXret
|
||||
// Instruction Class Templates
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Three-operand f32 instruction template
|
||||
multiclass FLOAT3<string opcstr, SDNode opnode> {
|
||||
def rr : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, RRegf32:$b),
|
||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
||||
[(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
|
||||
def ri : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, f32imm:$b),
|
||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
||||
[(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
|
||||
}
|
||||
|
||||
multiclass INT3<string opcstr, SDNode opnode> {
|
||||
def rr : InstPTX<(outs RRegs32:$d),
|
||||
(ins RRegs32:$a, RRegs32:$b),
|
||||
@ -204,6 +216,12 @@ multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
|
||||
// Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
///===- Floating-Point Arithmetic Instructions ----------------------------===//
|
||||
|
||||
defm FADD : FLOAT3<"add", fadd>;
|
||||
defm FSUB : FLOAT3<"sub", fsub>;
|
||||
defm FMUL : FLOAT3<"mul", fmul>;
|
||||
|
||||
///===- Integer Arithmetic Instructions -----------------------------------===//
|
||||
|
||||
defm ADD : INT3<"add", add>;
|
||||
@ -223,6 +241,8 @@ let neverHasSideEffects = 1 in {
|
||||
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
|
||||
def MOVrr
|
||||
: InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
|
||||
def FMOVrr
|
||||
: InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
|
||||
}
|
||||
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
@ -232,8 +252,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
def MOVri
|
||||
: InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
|
||||
[(set RRegs32:$d, imm:$a)]>;
|
||||
def FMOVri
|
||||
: InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
|
||||
[(set RRegf32:$d, fpimm:$a)]>;
|
||||
}
|
||||
|
||||
// Integer loads
|
||||
defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
|
||||
defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>;
|
||||
defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
|
||||
@ -243,12 +267,30 @@ defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
|
||||
def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
|
||||
"ld.param.%type\t$d, [$a]", []>;
|
||||
|
||||
// Floating-point loads
|
||||
defm FLDg : PTX_LD<"ld.global", RRegf32, load_global>;
|
||||
defm FLDc : PTX_LD<"ld.const", RRegf32, load_constant>;
|
||||
defm FLDl : PTX_LD<"ld.local", RRegf32, load_local>;
|
||||
defm FLDp : PTX_LD<"ld.param", RRegf32, load_parameter>;
|
||||
defm FLDs : PTX_LD<"ld.shared", RRegf32, load_shared>;
|
||||
|
||||
def FLDpi : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
|
||||
"ld.param.%type\t$d, [$a]", []>;
|
||||
|
||||
// Integer stores
|
||||
defm STg : PTX_ST<"st.global", RRegs32, store_global>;
|
||||
defm STl : PTX_ST<"st.local", RRegs32, store_local>;
|
||||
// Store to parameter state space requires PTX 2.0 or higher?
|
||||
// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
|
||||
defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
|
||||
|
||||
// Floating-point stores
|
||||
defm FSTg : PTX_ST<"st.global", RRegf32, store_global>;
|
||||
defm FSTl : PTX_ST<"st.local", RRegf32, store_local>;
|
||||
// Store to parameter state space requires PTX 2.0 or higher?
|
||||
// defm FSTp : PTX_ST<"st.param", RRegf32, store_parameter>;
|
||||
defm FSTs : PTX_ST<"st.shared", RRegf32, store_shared>;
|
||||
|
||||
///===- Control Flow Instructions -----------------------------------------===//
|
||||
|
||||
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
|
@ -85,6 +85,40 @@ def R29 : PTXReg<"r29">;
|
||||
def R30 : PTXReg<"r30">;
|
||||
def R31 : PTXReg<"r31">;
|
||||
|
||||
def F0 : PTXReg<"f0">;
|
||||
def F1 : PTXReg<"f1">;
|
||||
def F2 : PTXReg<"f2">;
|
||||
def F3 : PTXReg<"f3">;
|
||||
def F4 : PTXReg<"f4">;
|
||||
def F5 : PTXReg<"f5">;
|
||||
def F6 : PTXReg<"f6">;
|
||||
def F7 : PTXReg<"f7">;
|
||||
def F8 : PTXReg<"f8">;
|
||||
def F9 : PTXReg<"f9">;
|
||||
def F10 : PTXReg<"f10">;
|
||||
def F11 : PTXReg<"f11">;
|
||||
def F12 : PTXReg<"f12">;
|
||||
def F13 : PTXReg<"f13">;
|
||||
def F14 : PTXReg<"f14">;
|
||||
def F15 : PTXReg<"f15">;
|
||||
def F16 : PTXReg<"f16">;
|
||||
def F17 : PTXReg<"f17">;
|
||||
def F18 : PTXReg<"f18">;
|
||||
def F19 : PTXReg<"f19">;
|
||||
def F20 : PTXReg<"f20">;
|
||||
def F21 : PTXReg<"f21">;
|
||||
def F22 : PTXReg<"f22">;
|
||||
def F23 : PTXReg<"f23">;
|
||||
def F24 : PTXReg<"f24">;
|
||||
def F25 : PTXReg<"f25">;
|
||||
def F26 : PTXReg<"f26">;
|
||||
def F27 : PTXReg<"f27">;
|
||||
def F28 : PTXReg<"f28">;
|
||||
def F29 : PTXReg<"f29">;
|
||||
def F30 : PTXReg<"f30">;
|
||||
def F31 : PTXReg<"f31">;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -100,3 +134,9 @@ def RRegs32 : RegisterClass<"PTX", [i32], 32,
|
||||
R8, R9, R10, R11, R12, R13, R14, R15,
|
||||
R16, R17, R18, R19, R20, R21, R22, R23,
|
||||
R24, R25, R26, R27, R28, R29, R30, R31]>;
|
||||
|
||||
def RRegf32 : RegisterClass<"PTX", [f32], 32,
|
||||
[F0, F1, F2, F3, F4, F5, F6, F7,
|
||||
F8, F9, F10, F11, F12, F13, F14, F15,
|
||||
F16, F17, F18, F19, F20, F21, F22, F23,
|
||||
F24, F25, F26, F27, F28, F29, F30, F31]>;
|
||||
|
@ -13,3 +13,17 @@ define ptx_device i32 @t2(i32 %x) {
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t3(float %x, float %y) {
|
||||
; CHECK: add.f32 f0, f1, f2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device float @t4(float %x) {
|
||||
; CHECK: add.f32 f0, f1, 0F3F800000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
86
test/CodeGen/PTX/ld_float.ll
Normal file
86
test/CodeGen/PTX/ld_float.ll
Normal file
@ -0,0 +1,86 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .f32 array[];
|
||||
@array = external global [10 x float]
|
||||
|
||||
;CHECK: .extern .const .f32 array_constant[];
|
||||
@array_constant = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .local .f32 array_local[];
|
||||
@array_local = external addrspace(2) global [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .f32 array_shared[];
|
||||
@array_shared = external addrspace(4) global [10 x float]
|
||||
|
||||
define ptx_device float @t1(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [r1];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load float* %p
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t2(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [r1+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t3(float* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.s32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.f32 f0, [r0];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_global() {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [array];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array, i32 0, i32 0
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_const() {
|
||||
entry:
|
||||
;CHECK: ld.const.f32 f0, [array_constant];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(1)* @array_constant, i32 0, i32 0
|
||||
%x = load float addrspace(1)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_local() {
|
||||
entry:
|
||||
;CHECK: ld.local.f32 f0, [array_local];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
|
||||
%x = load float addrspace(2)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_shared() {
|
||||
entry:
|
||||
;CHECK: ld.shared.f32 f0, [array_shared];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
|
||||
%x = load float addrspace(4)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t5() {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [array+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array, i32 0, i32 1
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
@ -11,3 +11,15 @@ define ptx_device i32 @t2(i32 %x) {
|
||||
; CHECK: ret;
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t3() {
|
||||
; CHECK: mov.f32 f0, 0F00000000;
|
||||
; CHECK-NEXT: ret;
|
||||
ret float 0.0
|
||||
}
|
||||
|
||||
define ptx_device float @t4(float %x) {
|
||||
; CHECK: mov.f32 f0, f1;
|
||||
; CHECK-NEXT: ret;
|
||||
ret float %x
|
||||
}
|
||||
|
25
test/CodeGen/PTX/mul.ll
Normal file
25
test/CodeGen/PTX/mul.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
;define ptx_device i32 @t1(i32 %x, i32 %y) {
|
||||
; %z = mul i32 %x, %y
|
||||
; ret i32 %z
|
||||
;}
|
||||
|
||||
;define ptx_device i32 @t2(i32 %x) {
|
||||
; %z = mul i32 %x, 1
|
||||
; ret i32 %z
|
||||
;}
|
||||
|
||||
define ptx_device float @t3(float %x, float %y) {
|
||||
; CHECK: mul.f32 f0, f1, f2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device float @t4(float %x) {
|
||||
; CHECK: mul.f32 f0, f1, 0F40A00000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul float %x, 5.0
|
||||
ret float %z
|
||||
}
|
78
test/CodeGen/PTX/st_float.ll
Normal file
78
test/CodeGen/PTX/st_float.ll
Normal file
@ -0,0 +1,78 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .f32 array[];
|
||||
@array = external global [10 x float]
|
||||
|
||||
;CHECK: .extern .const .f32 array_constant[];
|
||||
@array_constant = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .local .f32 array_local[];
|
||||
@array_local = external addrspace(2) global [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .f32 array_shared[];
|
||||
@array_shared = external addrspace(4) global [10 x float]
|
||||
|
||||
define ptx_device void @t1(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [r1], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
store float %x, float* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [r1+4], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3(float* %p, i32 %q, float %x) {
|
||||
;CHECK: .reg .s32 r0;
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.s32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.f32 [r0], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global(float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [array], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array, i32 0, i32 0
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local(float %x) {
|
||||
entry:
|
||||
;CHECK: st.local.f32 [array_local], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
|
||||
store float %x, float addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared(float %x) {
|
||||
entry:
|
||||
;CHECK: st.shared.f32 [array_shared], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
|
||||
store float %x, float addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5(float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [array+4], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array, i32 0, i32 1
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
@ -13,3 +13,17 @@ define ptx_device i32 @t2(i32 %x) {
|
||||
;CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t3(float %x, float %y) {
|
||||
; CHECK: sub.f32 f0, f1, f2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device float @t4(float %x) {
|
||||
; CHECK: add.f32 f0, f1, 0FBF800000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user