diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index 1cb8a9a26e4..c7b8aa49370 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -41,21 +41,26 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { switch (RCId) { default: report_fatal_error("Bad virtual register encoding"); case 0: + // This is actually a physical register, so defer to the autogenerated + // register printer + OS << getRegisterName(RegNo); + return; + case 1: OS << "%p"; break; - case 1: + case 2: OS << "%rs"; break; - case 2: + case 3: OS << "%r"; break; - case 3: + case 4: OS << "%rl"; break; - case 4: + case 5: OS << "%f"; break; - case 5: + case 6: OS << "%fl"; break; } diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index b417d644b63..fb4d4df5250 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -368,33 +368,39 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, } unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); - DenseMap &RegMap = VRegMapping[RC]; - unsigned RegNum = RegMap[Reg]; + DenseMap &RegMap = VRegMapping[RC]; + unsigned RegNum = RegMap[Reg]; - // Encode the register class in the upper 4 bits - // Must be kept in sync with NVPTXInstPrinter::printRegName - unsigned Ret = 0; - if (RC == &NVPTX::Int1RegsRegClass) { - Ret = 0; - } else if (RC == &NVPTX::Int16RegsRegClass) { - Ret = (1 << 28); - } else if (RC == &NVPTX::Int32RegsRegClass) { - Ret = (2 << 28); - } else if (RC == &NVPTX::Int64RegsRegClass) { - Ret = (3 << 28); - } else if (RC == &NVPTX::Float32RegsRegClass) { - Ret = (4 << 28); - } else if (RC == &NVPTX::Float64RegsRegClass) { - Ret = (5 << 28); + // Encode the register class in the upper 4 bits + // Must be kept in sync with NVPTXInstPrinter::printRegName + unsigned Ret = 0; + if (RC == &NVPTX::Int1RegsRegClass) { + Ret = (1 << 28); + } else if (RC == &NVPTX::Int16RegsRegClass) { + Ret = (2 << 28); + } else if (RC == &NVPTX::Int32RegsRegClass) { + Ret = (3 << 28); + } else if (RC == &NVPTX::Int64RegsRegClass) { + Ret = (4 << 28); + } else if (RC == &NVPTX::Float32RegsRegClass) { + Ret = (5 << 28); + } else if (RC == &NVPTX::Float64RegsRegClass) { + Ret = (6 << 28); + } else { + report_fatal_error("Bad register class"); + } + + // Insert the vreg number + Ret |= (RegNum & 0x0FFFFFFF); + return Ret; } else { - report_fatal_error("Bad register class"); + // Some special-use registers are actually physical registers. + // Encode this as the register class ID of 0 and the real register ID. + return Reg & 0x0FFFFFFF; } - - // Insert the vreg number - Ret |= (RegNum & 0x0FFFFFFF); - return Ret; } MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 6533da5102b..9030584f06f 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Target/TargetInstrInfo.h" @@ -36,30 +37,24 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { // in the BB, so giving it no debug location. DebugLoc dl = DebugLoc(); - if (tm.getSubtargetImpl()->hasGenericLdSt()) { - // mov %SPL, %depot; - // cvta.local %SP, %SPL; - if (is64bit) { - MachineInstr *MI = BuildMI( - MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), - NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); - } else { - MachineInstr *MI = BuildMI( - MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), - NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); - } + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // mov %SPL, %depot; + // cvta.local %SP, %SPL; + if (is64bit) { + unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), + NVPTX::VRFrame).addReg(LocalReg); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64), + LocalReg).addImm(MF.getFunctionNumber()); } else { - // mov %SP, %depot; - if (is64bit) - BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), - NVPTX::VRFrame).addReg(NVPTX::VRDepot); - else - BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), - NVPTX::VRFrame).addReg(NVPTX::VRDepot); + unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes), + NVPTX::VRFrame).addReg(LocalReg); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR), + LocalReg).addImm(MF.getFunctionNumber()); } } } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 8ce16e9d1c4..db91eb00529 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1321,6 +1321,15 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), "mov.u64 \t$dst, $a;", [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; +// Get pointer to local stack +def MOV_DEPOT_ADDR + : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), + "mov.u32 \t$d, __local_depot$num;", []>; +def MOV_DEPOT_ADDR_64 + : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), + "mov.u64 \t$d, __local_depot$num;", []>; + + // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp let IsSimpleMove=1 in { def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), diff --git a/test/CodeGen/NVPTX/local-stack-frame.ll b/test/CodeGen/NVPTX/local-stack-frame.ll new file mode 100644 index 00000000000..178dff1a5d3 --- /dev/null +++ b/test/CodeGen/NVPTX/local-stack-frame.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + +; Ensure we access the local stack properly + +; PTX32: mov.u32 %r{{[0-9]+}}, __local_depot{{[0-9]+}}; +; PTX32: cvta.local.u32 %SP, %r{{[0-9]+}}; +; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0]; +; PTX32: st.u32 [%SP+0], %r{{[0-9]+}}; +; PTX64: mov.u64 %rl{{[0-9]+}}, __local_depot{{[0-9]+}}; +; PTX64: cvta.local.u64 %SP, %rl{{[0-9]+}}; +; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0]; +; PTX64: st.u32 [%SP+0], %r{{[0-9]+}}; +define void @foo(i32 %a) { + %local = alloca i32, align 4 + store i32 %a, i32* %local + ret void +}