diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index 3db63a3b3b1..19385ba1ff8 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -21,6 +21,16 @@ namespace llvm { class PTXTargetMachine; class FunctionPass; + namespace PTX { + enum StateSpace { + GLOBAL = 0, // default to global state space + CONSTANT = 1, + LOCAL = 2, + PARAMETER = 3, + SHARED = 4 + }; + } // namespace PTX + FunctionPass *createPTXISelDag(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index cd27fb5d82e..872287eeea8 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -103,11 +103,14 @@ static const char *getInstructionTypeName(const MachineInstr *MI) { } static const char *getStateSpaceName(unsigned addressSpace) { - if (addressSpace <= 255) - return "global"; - // TODO Add more state spaces - - llvm_unreachable("Unknown state space"); + switch (addressSpace) { + default: llvm_unreachable("Unknown state space"); + case PTX::GLOBAL: return "global"; + case PTX::CONSTANT: return "const"; + case PTX::LOCAL: return "local"; + case PTX::PARAMETER: return "param"; + case PTX::SHARED: return "shared"; + } return NULL; } diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 1072103b32b..65386c825eb 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -22,9 +22,47 @@ include "PTXInstrFormats.td" //===----------------------------------------------------------------------===// def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast(Src->getType())) - return PT->getAddressSpace() <= 255; + const Value *Src; + const PointerType *PT; + if ((Src = cast(N)->getSrcValue()) && + (PT = dyn_cast(Src->getType()))) + return PT->getAddressSpace() == PTX::GLOBAL; + return false; +}]>; + +def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast(N)->getSrcValue()) && + (PT = dyn_cast(Src->getType()))) + return PT->getAddressSpace() == PTX::CONSTANT; + return false; +}]>; + +def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast(N)->getSrcValue()) && + (PT = dyn_cast(Src->getType()))) + return PT->getAddressSpace() == PTX::LOCAL; + return false; +}]>; + +def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast(N)->getSrcValue()) && + (PT = dyn_cast(Src->getType()))) + return PT->getAddressSpace() == PTX::PARAMETER; + return false; +}]>; + +def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast(N)->getSrcValue()) && + (PT = dyn_cast(Src->getType()))) + return PT->getAddressSpace() == PTX::SHARED; return false; }]>; @@ -142,6 +180,10 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { } defm LDg : PTX_LD<"ld.global", RRegs32, load_global>; +defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>; +defm LDl : PTX_LD<"ld.local", RRegs32, load_local>; +defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>; +defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>; ///===- Control Flow Instructions -----------------------------------------===// diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll index 5ae82a0385b..baafbc2d3d2 100644 --- a/test/CodeGen/PTX/ld.ll +++ b/test/CodeGen/PTX/ld.ll @@ -3,6 +3,15 @@ ;CHECK: .extern .global .s32 array[]; @array = external global [10 x i32] +;CHECK: .extern .const .s32 array_constant[]; +@array_constant = external addrspace(1) constant [10 x i32] + +;CHECK: .extern .local .s32 array_local[]; +@array_local = external addrspace(2) global [10 x i32] + +;CHECK: .extern .shared .s32 array_shared[]; +@array_shared = external addrspace(4) global [10 x i32] + define ptx_device i32 @t1(i32* %p) { entry: ;CHECK: ld.global.s32 r0, [r1]; @@ -27,7 +36,7 @@ entry: ret i32 %x } -define ptx_device i32 @t4() { +define ptx_device i32 @t4_global() { entry: ;CHECK: ld.global.s32 r0, [array]; %i = getelementptr [10 x i32]* @array, i32 0, i32 0 @@ -35,6 +44,30 @@ entry: ret i32 %x } +define ptx_device i32 @t4_const() { +entry: +;CHECK: ld.const.s32 r0, [array_constant]; + %i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0 + %x = load i32 addrspace(1)* %i + ret i32 %x +} + +define ptx_device i32 @t4_local() { +entry: +;CHECK: ld.local.s32 r0, [array_local]; + %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0 + %x = load i32 addrspace(2)* %i + ret i32 %x +} + +define ptx_device i32 @t4_shared() { +entry: +;CHECK: ld.shared.s32 r0, [array_shared]; + %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0 + %x = load i32 addrspace(4)* %i + ret i32 %x +} + define ptx_device i32 @t5() { entry: ;CHECK: ld.global.s32 r0, [array+4];