diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 1cb8a9a26e4..c7b8aa49370 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -41,21 +41,26 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   switch (RCId) {
   default: report_fatal_error("Bad virtual register encoding");
   case 0:
+    // This is actually a physical register, so defer to the autogenerated
+    // register printer
+    OS << getRegisterName(RegNo);
+    return;
+  case 1:
     OS << "%p";
     break;
-  case 1:
+  case 2:
     OS << "%rs";
     break;
-  case 2:
+  case 3:
     OS << "%r";
     break;
-  case 3:
+  case 4:
     OS << "%rl";
     break;
-  case 4:
+  case 5:
     OS << "%f";
     break;
-  case 5:
+  case 6:
     OS << "%fl";
     break;
   }
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index b417d644b63..fb4d4df5250 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -368,33 +368,39 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
 }
 
 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
-  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
 
-  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
-  unsigned RegNum = RegMap[Reg];
+    DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
+    unsigned RegNum = RegMap[Reg];
 
-  // Encode the register class in the upper 4 bits
-  // Must be kept in sync with NVPTXInstPrinter::printRegName
-  unsigned Ret = 0;
-  if (RC == &NVPTX::Int1RegsRegClass) {
-    Ret = 0;
-  } else if (RC == &NVPTX::Int16RegsRegClass) {
-    Ret = (1 << 28);
-  } else if (RC == &NVPTX::Int32RegsRegClass) {
-    Ret = (2 << 28);
-  } else if (RC == &NVPTX::Int64RegsRegClass) {
-    Ret = (3 << 28);
-  } else if (RC == &NVPTX::Float32RegsRegClass) {
-    Ret = (4 << 28);
-  } else if (RC == &NVPTX::Float64RegsRegClass) {
-    Ret = (5 << 28);
+    // Encode the register class in the upper 4 bits
+    // Must be kept in sync with NVPTXInstPrinter::printRegName
+    unsigned Ret = 0;
+    if (RC == &NVPTX::Int1RegsRegClass) {
+      Ret = (1 << 28);
+    } else if (RC == &NVPTX::Int16RegsRegClass) {
+      Ret = (2 << 28);
+    } else if (RC == &NVPTX::Int32RegsRegClass) {
+      Ret = (3 << 28);
+    } else if (RC == &NVPTX::Int64RegsRegClass) {
+      Ret = (4 << 28);
+    } else if (RC == &NVPTX::Float32RegsRegClass) {
+      Ret = (5 << 28);
+    } else if (RC == &NVPTX::Float64RegsRegClass) {
+      Ret = (6 << 28);
+    } else {
+      report_fatal_error("Bad register class");
+    }
+
+    // Insert the vreg number
+    Ret |= (RegNum & 0x0FFFFFFF);
+    return Ret;
   } else {
-    report_fatal_error("Bad register class");
+    // Some special-use registers are actually physical registers.
+    // Encode this as the register class ID of 0 and the real register ID.
+    return Reg & 0x0FFFFFFF;
   }
-
-  // Insert the vreg number
-  Ret |= (RegNum & 0x0FFFFFFF);
-  return Ret;
 }
 
 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 6533da5102b..9030584f06f 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
@@ -36,30 +37,24 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
     // in the BB, so giving it no debug location.
     DebugLoc dl = DebugLoc();
 
-    if (tm.getSubtargetImpl()->hasGenericLdSt()) {
-      // mov %SPL, %depot;
-      // cvta.local %SP, %SPL;
-      if (is64bit) {
-        MachineInstr *MI = BuildMI(
-            MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
-            NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
-        BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
-                NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
-      } else {
-        MachineInstr *MI = BuildMI(
-            MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
-            NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
-        BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
-                NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
-      }
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+
+    // mov %SPL, %depot;
+    // cvta.local %SP, %SPL;
+    if (is64bit) {
+      unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
+      MachineInstr *MI = BuildMI(
+          MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+          NVPTX::VRFrame).addReg(LocalReg);
+      BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
+              LocalReg).addImm(MF.getFunctionNumber());
     } else {
-      // mov %SP, %depot;
-      if (is64bit)
-        BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
-                NVPTX::VRFrame).addReg(NVPTX::VRDepot);
-      else
-        BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
-                NVPTX::VRFrame).addReg(NVPTX::VRDepot);
+      unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
+      MachineInstr *MI = BuildMI(
+          MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+          NVPTX::VRFrame).addReg(LocalReg);
+      BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
+              LocalReg).addImm(MF.getFunctionNumber());
     }
   }
 }
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8ce16e9d1c4..db91eb00529 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1321,6 +1321,15 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
                      "mov.u64 \t$dst, $a;",
                      [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
 
+// Get pointer to local stack
+def MOV_DEPOT_ADDR
+  : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
+              "mov.u32 \t$d, __local_depot$num;", []>;
+def MOV_DEPOT_ADDR_64
+  : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
+              "mov.u64 \t$d, __local_depot$num;", []>;
+
+
 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
 let IsSimpleMove=1 in {
 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
diff --git a/test/CodeGen/NVPTX/local-stack-frame.ll b/test/CodeGen/NVPTX/local-stack-frame.ll
new file mode 100644
index 00000000000..178dff1a5d3
--- /dev/null
+++ b/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+; Ensure we access the local stack properly
+
+; PTX32:        mov.u32         %r{{[0-9]+}}, __local_depot{{[0-9]+}};
+; PTX32:        cvta.local.u32  %SP, %r{{[0-9]+}};
+; PTX32:        ld.param.u32    %r{{[0-9]+}}, [foo_param_0];
+; PTX32:        st.u32  [%SP+0], %r{{[0-9]+}};
+; PTX64:        mov.u64         %rl{{[0-9]+}}, __local_depot{{[0-9]+}};
+; PTX64:        cvta.local.u64  %SP, %rl{{[0-9]+}};
+; PTX64:        ld.param.u32    %r{{[0-9]+}}, [foo_param_0];
+; PTX64:        st.u32  [%SP+0], %r{{[0-9]+}};
+define void @foo(i32 %a) {
+  %local = alloca i32, align 4
+  store i32 %a, i32* %local
+  ret void
+}