[NVPTX] Fix bug in stack code generation causes by MC conversion

We do use a very small set of physical registers, so account for
them in the virtual register encoding between MachineInstr and MC

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187799 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski 2013-08-06 14:13:31 +00:00
parent 82767327c5
commit a3635eefc7
5 changed files with 84 additions and 51 deletions

View File

@ -41,21 +41,26 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
switch (RCId) {
default: report_fatal_error("Bad virtual register encoding");
case 0:
// This is actually a physical register, so defer to the autogenerated
// register printer
OS << getRegisterName(RegNo);
return;
case 1:
OS << "%p";
break;
case 1:
case 2:
OS << "%rs";
break;
case 2:
case 3:
OS << "%r";
break;
case 3:
case 4:
OS << "%rl";
break;
case 4:
case 5:
OS << "%f";
break;
case 5:
case 6:
OS << "%fl";
break;
}

View File

@ -368,33 +368,39 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
}
unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
unsigned RegNum = RegMap[Reg];
DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
unsigned RegNum = RegMap[Reg];
// Encode the register class in the upper 4 bits
// Must be kept in sync with NVPTXInstPrinter::printRegName
unsigned Ret = 0;
if (RC == &NVPTX::Int1RegsRegClass) {
Ret = 0;
} else if (RC == &NVPTX::Int16RegsRegClass) {
Ret = (1 << 28);
} else if (RC == &NVPTX::Int32RegsRegClass) {
Ret = (2 << 28);
} else if (RC == &NVPTX::Int64RegsRegClass) {
Ret = (3 << 28);
} else if (RC == &NVPTX::Float32RegsRegClass) {
Ret = (4 << 28);
} else if (RC == &NVPTX::Float64RegsRegClass) {
Ret = (5 << 28);
// Encode the register class in the upper 4 bits
// Must be kept in sync with NVPTXInstPrinter::printRegName
unsigned Ret = 0;
if (RC == &NVPTX::Int1RegsRegClass) {
Ret = (1 << 28);
} else if (RC == &NVPTX::Int16RegsRegClass) {
Ret = (2 << 28);
} else if (RC == &NVPTX::Int32RegsRegClass) {
Ret = (3 << 28);
} else if (RC == &NVPTX::Int64RegsRegClass) {
Ret = (4 << 28);
} else if (RC == &NVPTX::Float32RegsRegClass) {
Ret = (5 << 28);
} else if (RC == &NVPTX::Float64RegsRegClass) {
Ret = (6 << 28);
} else {
report_fatal_error("Bad register class");
}
// Insert the vreg number
Ret |= (RegNum & 0x0FFFFFFF);
return Ret;
} else {
report_fatal_error("Bad register class");
// Some special-use registers are actually physical registers.
// Encode this as the register class ID of 0 and the real register ID.
return Reg & 0x0FFFFFFF;
}
// Insert the vreg number
Ret |= (RegNum & 0x0FFFFFFF);
return Ret;
}
MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,

View File

@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetInstrInfo.h"
@ -36,30 +37,24 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
// in the BB, so giving it no debug location.
DebugLoc dl = DebugLoc();
if (tm.getSubtargetImpl()->hasGenericLdSt()) {
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
if (is64bit) {
MachineInstr *MI = BuildMI(
MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
} else {
MachineInstr *MI = BuildMI(
MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
}
MachineRegisterInfo &MRI = MF.getRegInfo();
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
if (is64bit) {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
MachineInstr *MI = BuildMI(
MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
NVPTX::VRFrame).addReg(LocalReg);
BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
LocalReg).addImm(MF.getFunctionNumber());
} else {
// mov %SP, %depot;
if (is64bit)
BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
NVPTX::VRFrame).addReg(NVPTX::VRDepot);
else
BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
NVPTX::VRFrame).addReg(NVPTX::VRDepot);
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
MachineInstr *MI = BuildMI(
MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
NVPTX::VRFrame).addReg(LocalReg);
BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
LocalReg).addImm(MF.getFunctionNumber());
}
}
}

View File

@ -1321,6 +1321,15 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
"mov.u64 \t$dst, $a;",
[(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
// Get pointer to local stack
def MOV_DEPOT_ADDR
: NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
"mov.u32 \t$d, __local_depot$num;", []>;
def MOV_DEPOT_ADDR_64
: NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
"mov.u64 \t$d, __local_depot$num;", []>;
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
let IsSimpleMove=1 in {
def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),

View File

@ -0,0 +1,18 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
; Ensure we access the local stack properly
; PTX32: mov.u32 %r{{[0-9]+}}, __local_depot{{[0-9]+}};
; PTX32: cvta.local.u32 %SP, %r{{[0-9]+}};
; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
; PTX32: st.u32 [%SP+0], %r{{[0-9]+}};
; PTX64: mov.u64 %rl{{[0-9]+}}, __local_depot{{[0-9]+}};
; PTX64: cvta.local.u64 %SP, %rl{{[0-9]+}};
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
; PTX64: st.u32 [%SP+0], %r{{[0-9]+}};
define void @foo(i32 %a) {
%local = alloca i32, align 4
store i32 %a, i32* %local
ret void
}