Add preliminary support for .f32 in the PTX backend.

- Add appropriate TableGen patterns for fadd, fsub, fmul.
- Add .f32 as the PTX type for the LLVM float type.
- Allow parameters, return values, and global variable declarations
  to accept the float type.
- Add appropriate test cases.

Patch by Justin Holewinski



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126636 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Che-Liang Chiou 2011-02-28 06:34:09 +00:00
parent d8d1584c13
commit f71720231f
11 changed files with 360 additions and 10 deletions

View File

@ -84,6 +84,7 @@ static const char PARAM_PREFIX[] = "__param_";
static const char *getRegisterTypeName(unsigned RegNo) {
#define TEST_REGCLS(cls, clsstr) \
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
TEST_REGCLS(RRegf32, f32);
TEST_REGCLS(RRegs32, s32);
TEST_REGCLS(Preds, pred);
#undef TEST_REGCLS
@ -115,6 +116,21 @@ static const char *getStateSpaceName(unsigned addressSpace) {
return NULL;
}
static const char *getTypeName(const Type* type) {
while (true) {
switch (type->getTypeID()) {
default: llvm_unreachable("Unknown type");
case Type::FloatTyID: return ".f32";
case Type::IntegerTyID: return ".s32"; // TODO: Handle 64-bit types.
case Type::ArrayTyID:
case Type::PointerTyID:
type = dyn_cast<const SequentialType>(type)->getElementType();
break;
}
}
return NULL;
}
bool PTXAsmPrinter::doFinalization(Module &M) {
// XXX Temproarily remove global variables so that doFinalization() will not
// emit them again (global variables are emitted at beginning).
@ -218,6 +234,15 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case MachineOperand::MO_Register:
OS << getRegisterName(MO.getReg());
break;
case MachineOperand::MO_FPImmediate:
APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
if (constFP.getZExtValue() > 0) {
OS << "0F" << constFP.toString(16, false);
}
else {
OS << "0F00000000";
}
break;
}
}
@ -265,8 +290,8 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
decl += " ";
}
// TODO: add types
decl += ".s32 ";
decl += getTypeName(gv->getType());
decl += " ";
decl += gvsym->getName();

View File

@ -28,9 +28,12 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
// Set up the register classes.
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
// Customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@ -87,7 +90,8 @@ struct argmap_entry {
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
} argmap[] = {
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
argmap_entry(MVT::i32, PTX::RRegs32RegisterClass)
argmap_entry(MVT::i32, PTX::RRegs32RegisterClass),
argmap_entry(MVT::f32, PTX::RRegf32RegisterClass)
};
} // end anonymous namespace
@ -185,10 +189,18 @@ SDValue PTXTargetLowering::
if (Outs.size() == 0)
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
assert(Outs[0].VT == MVT::i32 && "Can return only basic types");
SDValue Flag;
unsigned reg = PTX::R0;
unsigned reg;
if (Outs[0].VT == MVT::i32) {
reg = PTX::R0;
}
else if (Outs[0].VT == MVT::f32) {
reg = PTX::F0;
}
else {
assert(false && "Can return only basic types");
}
MachineFunction &MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();

View File

@ -28,6 +28,7 @@ static const struct map_entry {
const int opcode;
} map[] = {
{ &PTX::RRegs32RegClass, PTX::MOVrr },
{ &PTX::RRegf32RegClass, PTX::MOVrr },
{ &PTX::PredsRegClass, PTX::MOVpp }
};
@ -35,12 +36,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const {
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) {
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
if (map[i].cls->contains(DstReg, SrcReg)) {
BuildMI(MBB, I, DL,
get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
get(map[i].opcode), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
return;
}
}
llvm_unreachable("Impossible reg-to-reg copy");
}

View File

@ -143,6 +143,18 @@ def PTXret
// Instruction Class Templates
//===----------------------------------------------------------------------===//
// Three-operand f32 instruction template
multiclass FLOAT3<string opcstr, SDNode opnode> {
def rr : InstPTX<(outs RRegf32:$d),
(ins RRegf32:$a, RRegf32:$b),
!strconcat(opcstr, ".%type\t$d, $a, $b"),
[(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
def ri : InstPTX<(outs RRegf32:$d),
(ins RRegf32:$a, f32imm:$b),
!strconcat(opcstr, ".%type\t$d, $a, $b"),
[(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
}
multiclass INT3<string opcstr, SDNode opnode> {
def rr : InstPTX<(outs RRegs32:$d),
(ins RRegs32:$a, RRegs32:$b),
@ -204,6 +216,12 @@ multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
// Instructions
//===----------------------------------------------------------------------===//
///===- Floating-Point Arithmetic Instructions ----------------------------===//
defm FADD : FLOAT3<"add", fadd>;
defm FSUB : FLOAT3<"sub", fsub>;
defm FMUL : FLOAT3<"mul", fmul>;
///===- Integer Arithmetic Instructions -----------------------------------===//
defm ADD : INT3<"add", add>;
@ -223,6 +241,8 @@ let neverHasSideEffects = 1 in {
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
def MOVrr
: InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
def FMOVrr
: InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@ -232,8 +252,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOVri
: InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
[(set RRegs32:$d, imm:$a)]>;
def FMOVri
: InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
[(set RRegf32:$d, fpimm:$a)]>;
}
// Integer loads
defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>;
defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
@ -243,12 +267,30 @@ defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
"ld.param.%type\t$d, [$a]", []>;
// Floating-point loads
defm FLDg : PTX_LD<"ld.global", RRegf32, load_global>;
defm FLDc : PTX_LD<"ld.const", RRegf32, load_constant>;
defm FLDl : PTX_LD<"ld.local", RRegf32, load_local>;
defm FLDp : PTX_LD<"ld.param", RRegf32, load_parameter>;
defm FLDs : PTX_LD<"ld.shared", RRegf32, load_shared>;
def FLDpi : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
"ld.param.%type\t$d, [$a]", []>;
// Integer stores
defm STg : PTX_ST<"st.global", RRegs32, store_global>;
defm STl : PTX_ST<"st.local", RRegs32, store_local>;
// Store to parameter state space requires PTX 2.0 or higher?
// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
// Floating-point stores
defm FSTg : PTX_ST<"st.global", RRegf32, store_global>;
defm FSTl : PTX_ST<"st.local", RRegf32, store_local>;
// Store to parameter state space requires PTX 2.0 or higher?
// defm FSTp : PTX_ST<"st.param", RRegf32, store_parameter>;
defm FSTs : PTX_ST<"st.shared", RRegf32, store_shared>;
///===- Control Flow Instructions -----------------------------------------===//
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {

View File

@ -85,6 +85,40 @@ def R29 : PTXReg<"r29">;
def R30 : PTXReg<"r30">;
def R31 : PTXReg<"r31">;
def F0 : PTXReg<"f0">;
def F1 : PTXReg<"f1">;
def F2 : PTXReg<"f2">;
def F3 : PTXReg<"f3">;
def F4 : PTXReg<"f4">;
def F5 : PTXReg<"f5">;
def F6 : PTXReg<"f6">;
def F7 : PTXReg<"f7">;
def F8 : PTXReg<"f8">;
def F9 : PTXReg<"f9">;
def F10 : PTXReg<"f10">;
def F11 : PTXReg<"f11">;
def F12 : PTXReg<"f12">;
def F13 : PTXReg<"f13">;
def F14 : PTXReg<"f14">;
def F15 : PTXReg<"f15">;
def F16 : PTXReg<"f16">;
def F17 : PTXReg<"f17">;
def F18 : PTXReg<"f18">;
def F19 : PTXReg<"f19">;
def F20 : PTXReg<"f20">;
def F21 : PTXReg<"f21">;
def F22 : PTXReg<"f22">;
def F23 : PTXReg<"f23">;
def F24 : PTXReg<"f24">;
def F25 : PTXReg<"f25">;
def F26 : PTXReg<"f26">;
def F27 : PTXReg<"f27">;
def F28 : PTXReg<"f28">;
def F29 : PTXReg<"f29">;
def F30 : PTXReg<"f30">;
def F31 : PTXReg<"f31">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
@ -100,3 +134,9 @@ def RRegs32 : RegisterClass<"PTX", [i32], 32,
R8, R9, R10, R11, R12, R13, R14, R15,
R16, R17, R18, R19, R20, R21, R22, R23,
R24, R25, R26, R27, R28, R29, R30, R31]>;
def RRegf32 : RegisterClass<"PTX", [f32], 32,
[F0, F1, F2, F3, F4, F5, F6, F7,
F8, F9, F10, F11, F12, F13, F14, F15,
F16, F17, F18, F19, F20, F21, F22, F23,
F24, F25, F26, F27, F28, F29, F30, F31]>;

View File

@ -13,3 +13,17 @@ define ptx_device i32 @t2(i32 %x) {
; CHECK: ret;
ret i32 %z
}
define ptx_device float @t3(float %x, float %y) {
; CHECK: add.f32 f0, f1, f2
; CHECK-NEXT: ret;
%z = fadd float %x, %y
ret float %z
}
define ptx_device float @t4(float %x) {
; CHECK: add.f32 f0, f1, 0F3F800000;
; CHECK-NEXT: ret;
%z = fadd float %x, 1.0
ret float %z
}

View File

@ -0,0 +1,86 @@
; RUN: llc < %s -march=ptx | FileCheck %s
;CHECK: .extern .global .f32 array[];
@array = external global [10 x float]
;CHECK: .extern .const .f32 array_constant[];
@array_constant = external addrspace(1) constant [10 x float]
;CHECK: .extern .local .f32 array_local[];
@array_local = external addrspace(2) global [10 x float]
;CHECK: .extern .shared .f32 array_shared[];
@array_shared = external addrspace(4) global [10 x float]
define ptx_device float @t1(float* %p) {
entry:
;CHECK: ld.global.f32 f0, [r1];
;CHECK-NEXT: ret;
%x = load float* %p
ret float %x
}
define ptx_device float @t2(float* %p) {
entry:
;CHECK: ld.global.f32 f0, [r1+4];
;CHECK-NEXT: ret;
%i = getelementptr float* %p, i32 1
%x = load float* %i
ret float %x
}
define ptx_device float @t3(float* %p, i32 %q) {
entry:
;CHECK: shl.b32 r0, r2, 2;
;CHECK-NEXT: add.s32 r0, r1, r0;
;CHECK-NEXT: ld.global.f32 f0, [r0];
;CHECK-NEXT: ret;
%i = getelementptr float* %p, i32 %q
%x = load float* %i
ret float %x
}
define ptx_device float @t4_global() {
entry:
;CHECK: ld.global.f32 f0, [array];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float]* @array, i32 0, i32 0
%x = load float* %i
ret float %x
}
define ptx_device float @t4_const() {
entry:
;CHECK: ld.const.f32 f0, [array_constant];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(1)* @array_constant, i32 0, i32 0
%x = load float addrspace(1)* %i
ret float %x
}
define ptx_device float @t4_local() {
entry:
;CHECK: ld.local.f32 f0, [array_local];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
%x = load float addrspace(2)* %i
ret float %x
}
define ptx_device float @t4_shared() {
entry:
;CHECK: ld.shared.f32 f0, [array_shared];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
%x = load float addrspace(4)* %i
ret float %x
}
define ptx_device float @t5() {
entry:
;CHECK: ld.global.f32 f0, [array+4];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float]* @array, i32 0, i32 1
%x = load float* %i
ret float %x
}

View File

@ -11,3 +11,15 @@ define ptx_device i32 @t2(i32 %x) {
; CHECK: ret;
ret i32 %x
}
define ptx_device float @t3() {
; CHECK: mov.f32 f0, 0F00000000;
; CHECK-NEXT: ret;
ret float 0.0
}
define ptx_device float @t4(float %x) {
; CHECK: mov.f32 f0, f1;
; CHECK-NEXT: ret;
ret float %x
}

25
test/CodeGen/PTX/mul.ll Normal file
View File

@ -0,0 +1,25 @@
; RUN: llc < %s -march=ptx | FileCheck %s
;define ptx_device i32 @t1(i32 %x, i32 %y) {
; %z = mul i32 %x, %y
; ret i32 %z
;}
;define ptx_device i32 @t2(i32 %x) {
; %z = mul i32 %x, 1
; ret i32 %z
;}
define ptx_device float @t3(float %x, float %y) {
; CHECK: mul.f32 f0, f1, f2
; CHECK-NEXT: ret;
%z = fmul float %x, %y
ret float %z
}
define ptx_device float @t4(float %x) {
; CHECK: mul.f32 f0, f1, 0F40A00000;
; CHECK-NEXT: ret;
%z = fmul float %x, 5.0
ret float %z
}

View File

@ -0,0 +1,78 @@
; RUN: llc < %s -march=ptx | FileCheck %s
;CHECK: .extern .global .f32 array[];
@array = external global [10 x float]
;CHECK: .extern .const .f32 array_constant[];
@array_constant = external addrspace(1) constant [10 x float]
;CHECK: .extern .local .f32 array_local[];
@array_local = external addrspace(2) global [10 x float]
;CHECK: .extern .shared .f32 array_shared[];
@array_shared = external addrspace(4) global [10 x float]
define ptx_device void @t1(float* %p, float %x) {
entry:
;CHECK: st.global.f32 [r1], f1;
;CHECK-NEXT: ret;
store float %x, float* %p
ret void
}
define ptx_device void @t2(float* %p, float %x) {
entry:
;CHECK: st.global.f32 [r1+4], f1;
;CHECK-NEXT: ret;
%i = getelementptr float* %p, i32 1
store float %x, float* %i
ret void
}
define ptx_device void @t3(float* %p, i32 %q, float %x) {
;CHECK: .reg .s32 r0;
entry:
;CHECK: shl.b32 r0, r2, 2;
;CHECK-NEXT: add.s32 r0, r1, r0;
;CHECK-NEXT: st.global.f32 [r0], f1;
;CHECK-NEXT: ret;
%i = getelementptr float* %p, i32 %q
store float %x, float* %i
ret void
}
define ptx_device void @t4_global(float %x) {
entry:
;CHECK: st.global.f32 [array], f1;
;CHECK-NEXT: ret;
%i = getelementptr [10 x float]* @array, i32 0, i32 0
store float %x, float* %i
ret void
}
define ptx_device void @t4_local(float %x) {
entry:
;CHECK: st.local.f32 [array_local], f1;
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
store float %x, float addrspace(2)* %i
ret void
}
define ptx_device void @t4_shared(float %x) {
entry:
;CHECK: st.shared.f32 [array_shared], f1;
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
store float %x, float addrspace(4)* %i
ret void
}
define ptx_device void @t5(float %x) {
entry:
;CHECK: st.global.f32 [array+4], f1;
;CHECK-NEXT: ret;
%i = getelementptr [10 x float]* @array, i32 0, i32 1
store float %x, float* %i
ret void
}

View File

@ -13,3 +13,17 @@ define ptx_device i32 @t2(i32 %x) {
;CHECK: ret;
ret i32 %z
}
define ptx_device float @t3(float %x, float %y) {
; CHECK: sub.f32 f0, f1, f2
; CHECK-NEXT: ret;
%z = fsub float %x, %y
ret float %z
}
define ptx_device float @t4(float %x) {
; CHECK: add.f32 f0, f1, 0FBF800000;
; CHECK-NEXT: ret;
%z = fsub float %x, 1.0
ret float %z
}