diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 3654efd22bd..4ed1849be60 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -86,6 +86,16 @@ namespace {
     //
     std::vector<unsigned> PhysRegsUseOrder;
 
+    // Virt2LastUseMap - This maps each virtual register to its last use
+    // (MachineInstr*, operand index pair).
+    IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
+    Virt2LastUseMap;
+
+    std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
+      assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+      return Virt2LastUseMap[Reg];
+    }
+
     // VirtRegModified - This bitset contains information about which virtual
     // registers need to be spilled back to memory when their registers are
     // scavenged.  If a virtual register has simply been rematerialized, there
@@ -282,8 +292,12 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
   
   const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
   
-  if (!isVirtRegModified(VirtReg))
+  if (!isVirtRegModified(VirtReg)) {
     DOUT << " which has not been modified, so no store necessary!";
+    std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
+    if (LastUse.first)
+      LastUse.first->getOperand(LastUse.second).setIsKill();
+  }
 
   // Otherwise, there is a virtual register corresponding to this physical
   // register.  We only need to spill it into its stack slot if it has been
@@ -507,6 +521,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
 
   MF->getRegInfo().setPhysRegUsed(PhysReg);
   MI->getOperand(OpNum).setReg(PhysReg);  // Assign the input register
+  getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
   return MI;
 }
 
@@ -722,6 +737,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
           DestPhysReg = getReg(MBB, MI, DestVirtReg);
         MF->getRegInfo().setPhysRegUsed(DestPhysReg);
         markVirtRegModified(DestVirtReg);
+        getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
         MI->getOperand(i).setReg(DestPhysReg);  // Assign the output register
       }
     }
@@ -823,7 +839,8 @@ bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
   // mapping for all virtual registers
   unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
   Virt2PhysRegMap.grow(LastVirtReg);
-  VirtRegModified.resize(LastVirtReg-MRegisterInfo::FirstVirtualRegister);
+  Virt2LastUseMap.grow(LastVirtReg);
+  VirtRegModified.resize(LastVirtReg+1-MRegisterInfo::FirstVirtualRegister);
 
   // Loop over all of the basic blocks, eliminating virtual register references
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
@@ -834,6 +851,7 @@ bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
   PhysRegsUsed.clear();
   VirtRegModified.clear();
   Virt2PhysRegMap.clear();
+  Virt2LastUseMap.clear();
   return true;
 }
 
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
new file mode 100644
index 00000000000..83ca3e3ac72
--- /dev/null
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -0,0 +1,35 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -regalloc=local
+
+define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
+entry:
+	%tmp71 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp72 = fdiv x86_fp80 %tmp71, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp73 = add x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
+	%tmp7374 = fptrunc x86_fp80 %tmp73 to double		; <double> [#uses=1]
+	store double %tmp7374, double* null, align 8
+	%tmp81 = load double* null, align 8		; <double> [#uses=1]
+	%tmp82 = add double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
+	%tmp83 = fdiv double %tmp82, 3.000000e+00		; <double> [#uses=1]
+	%tmp84 = call double @cos( double %tmp83 )		; <double> [#uses=1]
+	%tmp85 = mul double 0.000000e+00, %tmp84		; <double> [#uses=1]
+	%tmp8586 = fpext double %tmp85 to x86_fp80		; <x86_fp80> [#uses=1]
+	%tmp87 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp88 = fdiv x86_fp80 %tmp87, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp89 = add x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
+	%tmp8990 = fptrunc x86_fp80 %tmp89 to double		; <double> [#uses=1]
+	store double %tmp8990, double* null, align 8
+	%tmp97 = load double* null, align 8		; <double> [#uses=1]
+	%tmp98 = add double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
+	%tmp99 = fdiv double %tmp98, 3.000000e+00		; <double> [#uses=1]
+	%tmp100 = call double @cos( double %tmp99 )		; <double> [#uses=1]
+	%tmp101 = mul double 0.000000e+00, %tmp100		; <double> [#uses=1]
+	%tmp101102 = fpext double %tmp101 to x86_fp80		; <x86_fp80> [#uses=1]
+	%tmp103 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp104 = fdiv x86_fp80 %tmp103, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp105 = add x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
+	%tmp105106 = fptrunc x86_fp80 %tmp105 to double		; <double> [#uses=1]
+	store double %tmp105106, double* null, align 8
+	ret void
+}
+
+declare double @cos(double)