diff --git a/lib/CodeGen/RegAlloc/LiveRangeInfo.cpp b/lib/CodeGen/RegAlloc/LiveRangeInfo.cpp
index 15584f14a57..c774f935e63 100644
--- a/lib/CodeGen/RegAlloc/LiveRangeInfo.cpp
+++ b/lib/CodeGen/RegAlloc/LiveRangeInfo.cpp
@@ -52,6 +52,9 @@ LiveRangeInfo::~LiveRangeInfo() {
 
 void LiveRangeInfo::unionAndUpdateLRs(LiveRange *L1, LiveRange *L2) {
   assert(L1 != L2 && (!L1->hasSuggestedColor() || !L2->hasSuggestedColor()));
+  assert(! (L1->hasColor() && L2->hasColor()) ||
+         L1->getColor() == L2->getColor());
+
   set_union(*L1, *L2);                   // add elements of L2 to L1
 
   for(ValueSet::iterator L2It = L2->begin(); L2It != L2->end(); ++L2It) {
@@ -61,21 +64,23 @@ void LiveRangeInfo::unionAndUpdateLRs(LiveRange *L1, LiveRange *L2) {
     LiveRangeMap[*L2It] = L1;           // now the elements in L2 should map 
                                         //to L1    
   }
-
-
-  // Now if LROfDef(L1) has a suggested color, it will remain.
-  // But, if LROfUse(L2) has a suggested color, the new range
-  // must have the same color.
-
-  if(L2->hasSuggestedColor())
-    L1->setSuggestedColor(L2->getSuggestedColor());
-
-
+  
+  // set call interference for L1 from L2
   if (L2->isCallInterference())
     L1->setCallInterference();
   
   // add the spill costs
   L1->addSpillCost(L2->getSpillCost());
+
+  // If L2 has a color, give L1 that color.  Note that L1 may have had the same
+  // color or none, but would not have a different color as asserted above.
+  if (L2->hasColor())
+    L1->setColor(L2->getColor());
+
+  // Similarly, if LROfUse(L2) has a suggested color, the new range
+  // must have the same color.
+  if (L2->hasSuggestedColor())
+    L1->setSuggestedColor(L2->getSuggestedColor());
   
   delete L2;                        // delete L2 as it is no longer needed
 }
@@ -174,7 +179,16 @@ void LiveRangeInfo::constructLiveRanges() {
 	  const Value *Def = *OpI;
           bool isCC = (OpI.getMachineOperand().getType()
                        == MachineOperand::MO_CCRegister);
-          createOrAddToLiveRange(Def, isCC);
+          LiveRange* LR = createOrAddToLiveRange(Def, isCC);
+
+          // If the operand has a pre-assigned register,
+          // set it directly in the LiveRange
+          if (OpI.getMachineOperand().hasAllocatedReg()) {
+            unsigned getClassId;
+            LR->setColor(MRI.getClassRegNum(
+                                OpI.getMachineOperand().getAllocatedRegNum(),
+                                getClassId));
+          }
 	}
 
       // iterate over implicit MI operands and create a new LR
@@ -183,7 +197,16 @@ void LiveRangeInfo::constructLiveRanges() {
 	if (MInst->getImplicitOp(i).opIsDefOnly() ||
             MInst->getImplicitOp(i).opIsDefAndUse()) {     
 	  const Value *Def = MInst->getImplicitRef(i);
-          createOrAddToLiveRange(Def, /*isCC*/ false);
+          LiveRange* LR = createOrAddToLiveRange(Def, /*isCC*/ false);
+
+          // If the implicit operand has a pre-assigned register,
+          // set it directly in the LiveRange
+          if (MInst->getImplicitOp(i).hasAllocatedReg()) {
+            unsigned getClassId;
+            LR->setColor(MRI.getClassRegNum(
+                                MInst->getImplicitOp(i).getAllocatedRegNum(),
+                                getClassId));
+          }
 	}
 
     } // for all machine instructions in the BB
@@ -243,6 +266,54 @@ void LiveRangeInfo::suggestRegs4CallRets() {
 
 */
 //---------------------------------------------------------------------------
+
+
+// Checks if live range LR interferes with any node assigned or suggested to
+// be assigned the specified color
+// 
+inline bool InterferesWithColor(const LiveRange& LR, unsigned color)
+{
+  IGNode* lrNode = LR.getUserIGNode();
+  for (unsigned n=0, NN = lrNode->getNumOfNeighbors(); n < NN; n++) {
+    LiveRange *neighLR = lrNode->getAdjIGNode(n)->getParentLR();
+    if (neighLR->hasColor() && neighLR->getColor() == color)
+      return true;
+    if (neighLR->hasSuggestedColor() && neighLR->getSuggestedColor() == color)
+      return true;
+  }
+  return false;
+}
+
+// Cannot coalesce if any of the following is true:
+// (1) Both LRs have suggested colors (should be "different suggested colors"?)
+// (2) Both LR1 and LR2 have colors and the colors are different
+//    (but if the colors are the same, it is definitely safe to coalesce)
+// (3) LR1 has color and LR2 interferes with any LR that has the same color
+// (4) LR2 has color and LR1 interferes with any LR that has the same color
+// 
+inline bool InterfsPreventCoalescing(const LiveRange& LROfDef,
+                                     const LiveRange& LROfUse)
+{
+  // (4) if they have different suggested colors, cannot coalesce
+  if (LROfDef.hasSuggestedColor() && LROfUse.hasSuggestedColor())
+    return true;
+
+  // if neither has a color, nothing more to do.
+  if (! LROfDef.hasColor() && ! LROfUse.hasColor())
+    return false;
+
+  // (2, 3) if L1 has color...
+  if (LROfDef.hasColor()) {
+    if (LROfUse.hasColor())
+      return (LROfUse.getColor() != LROfDef.getColor());
+    return InterferesWithColor(LROfUse, LROfDef.getColor());
+  }
+
+  // (4) else only LROfUse has a color: check if that could interfere
+  return InterferesWithColor(LROfDef, LROfUse.getColor());
+}
+
+
 void LiveRangeInfo::coalesceLRs()  
 {
   if(DEBUG_RA >= RA_DEBUG_LiveRanges) 
@@ -298,10 +369,9 @@ void LiveRangeInfo::coalesceLRs()
                 }
 
 		if (CombinedDegree <= RCOfDef->getNumOfAvailRegs()) {
-		  // if both LRs do not have suggested colors
-		  if (!(LROfDef->hasSuggestedColor() &&  
-                        LROfUse->hasSuggestedColor())) {
-		    
+		  // if both LRs do not have different pre-assigned colors
+		  // and both LRs do not have suggested colors
+                  if (! InterfsPreventCoalescing(*LROfDef, *LROfUse)) {
 		    RCOfDef->mergeIGNodesOfLRs(LROfDef, LROfUse);
 		    unionAndUpdateLRs(LROfDef, LROfUse);
 		  }
@@ -319,10 +389,6 @@ void LiveRangeInfo::coalesceLRs()
     cerr << "\nCoalescing Done!\n";
 }
 
-
-
-
-
 /*--------------------------- Debug code for printing ---------------*/
 
 
diff --git a/lib/CodeGen/RegAlloc/PhyRegAlloc.cpp b/lib/CodeGen/RegAlloc/PhyRegAlloc.cpp
index 409916fb02b..613c16db751 100644
--- a/lib/CodeGen/RegAlloc/PhyRegAlloc.cpp
+++ b/lib/CodeGen/RegAlloc/PhyRegAlloc.cpp
@@ -432,6 +432,13 @@ InsertAfter(MachineInstr* newMI,
   MII = MBB.insert(MII, newMI);
 }
 
+inline void
+DeleteInstruction(MachineBasicBlock& MBB,
+                  MachineBasicBlock::iterator& MII)
+{
+  MII = MBB.erase(MII);
+}
+
 inline void
 SubstituteInPlace(MachineInstr* newMI,
                   MachineBasicBlock& MBB,
@@ -483,7 +490,72 @@ AppendInstructions(std::vector<MachineInstr *> &IAft,
 }
 
 
-void PhyRegAlloc::updateMachineCode() {
+void PhyRegAlloc::updateInstruction(MachineInstr* MInst, BasicBlock* BB)
+{
+  unsigned Opcode = MInst->getOpCode();
+  
+  // Reset tmp stack positions so they can be reused for each machine instr.
+  MF.getInfo()->popAllTempValues();  
+      
+  // First, set the registers for operands in the machine instruction
+  // if a register was successfully allocated.  Do this first because we
+  // will need to know which registers are already used by this instr'n.
+  // 
+  for (unsigned OpNum=0; OpNum < MInst->getNumOperands(); ++OpNum)
+    {
+      MachineOperand& Op = MInst->getOperand(OpNum);
+      if (Op.getType() ==  MachineOperand::MO_VirtualRegister || 
+          Op.getType() ==  MachineOperand::MO_CCRegister)
+        {
+          const Value *const Val =  Op.getVRegValue();
+          if (const LiveRange* LR = LRI.getLiveRangeForValue(Val))
+            if (LR->hasColor())
+              MInst->SetRegForOperand(OpNum,
+                          MRI.getUnifiedRegNum(LR->getRegClass()->getID(),
+                                               LR->getColor()));
+        }
+    } // for each operand
+  
+  // Mark that the operands have been updated.  setRelRegsUsedByThisInst()
+  // is called to find registers used by each MachineInst, and it should not
+  // be used for an instruction until this is done.  This flag just serves
+  // as a sanity check.
+  OperandsColoredMap[MInst] = true;
+
+  // Now insert special instructions (if necessary) for call/return
+  // instructions.  Do this before inserting spill code since some
+  // registers must be used by outgoing call arguments or the return value
+  // of a call, and spill code should not use those registers.
+  //
+  if (TM.getInstrInfo().isCall(Opcode) ||
+      TM.getInstrInfo().isReturn(Opcode)) {
+    AddedInstrns &AI = AddedInstrMap[MInst];
+	
+    if (TM.getInstrInfo().isCall(Opcode))
+      MRI.colorCallArgs(MInst, LRI, &AI, *this, BB);
+    else if (TM.getInstrInfo().isReturn(Opcode))
+      MRI.colorRetValue(MInst, LRI, &AI);
+  }
+      
+  // Now insert spill code for remaining operands not allocated to
+  // registers.  This must be done even for call return instructions
+  // since those are not handled by the special code above.
+  for (unsigned OpNum=0; OpNum < MInst->getNumOperands(); ++OpNum)
+    {
+      MachineOperand& Op = MInst->getOperand(OpNum);
+      if (Op.getType() ==  MachineOperand::MO_VirtualRegister || 
+          Op.getType() ==  MachineOperand::MO_CCRegister)
+        {
+          const Value* Val = Op.getVRegValue();
+          if (const LiveRange *LR = LRI.getLiveRangeForValue(Val))
+            if (! LR->hasColor())
+              insertCode4SpilledLR(LR, MInst, BB, OpNum);
+        }
+    } // for each operand
+}
+
+void PhyRegAlloc::updateMachineCode()
+{
   // Insert any instructions needed at method entry
   MachineBasicBlock::iterator MII = MF.front().begin();
   PrependInstructions(AddedInstrAtEntry.InstrnsBefore, MF.front(), MII,
@@ -495,8 +567,84 @@ void PhyRegAlloc::updateMachineCode() {
   for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end();
        BBI != BBE; ++BBI) {
 
-    // iterate over all the machine instructions in BB
     MachineBasicBlock &MBB = *BBI;
+
+    // Iterate over all machine instructions in BB and mark operands with
+    // their assigned registers or insert spill code, as appropriate. 
+    // Also, fix operands of call/return instructions.
+    // 
+    for (MachineBasicBlock::iterator MII = MBB.begin(); MII != MBB.end(); ++MII)
+      if (!TM.getInstrInfo().isDummyPhiInstr((*MII)->getOpCode())) // ignore Phis
+        updateInstruction(*MII, MBB.getBasicBlock());
+
+    // Now, move code out of delay slots of branches and returns if needed.
+    // (Also, move "after" code from calls to the last delay slot instruction.)
+    // Moving code out of delay slots is needed in 2 situations:
+    // (1) If this is a branch and it needs instructions inserted after it,
+    //     move any existing instructions out of the delay slot so that the
+    //     instructions can go into the delay slot.  This only supports the
+    //     case that #instrsAfter <= #delay slots.
+    // 
+    // (2) If any instruction in the delay slot needs
+    //     instructions inserted, move it out of the delay slot and before the
+    //     branch because putting code before or after it would be VERY BAD!
+    // 
+    // If the annul bit of the branch is set, neither of these is legal!
+    // If so, we need to handle spill differently but annulling is not yet used.
+    //
+    for (MachineBasicBlock::iterator MII = MBB.begin();
+         MII != MBB.end(); ++MII)
+      if (unsigned delaySlots =
+          TM.getInstrInfo().getNumDelaySlots((*MII)->getOpCode()))
+        { 
+          assert(delaySlots==1 && "Not handling multiple delay slots!");
+
+          MachineInstr *MInst = *MII; 
+          MachineInstr *MDelayInst = *(MII+1); 
+
+          // Check the 2 conditions above:
+          // (1) Does a branch need instructions added after it?
+          // (2) O/w does delay slot instr. need instrns before or after?
+          bool isBranch = (TM.getInstrInfo().isBranch((*MII)->getOpCode()) ||
+                           TM.getInstrInfo().isReturn((*MII)->getOpCode()));
+          bool cond1 = isBranch && AddedInstrMap[MInst].InstrnsAfter.size() > 0;
+          bool cond2 = (AddedInstrMap.count(MDelayInst) ||
+                        AddedInstrMap[MDelayInst].InstrnsAfter.size() > 0);
+
+          if (cond1 || cond2)
+            {
+              // Move delay slot instrn before the preceding branch.
+              // InsertBefore() modifies MII to point to the branch again.
+              assert(((*MII)->getOpCodeFlags() & AnnulFlag) == 0 &&
+                     "FIXME: Annul bit must be turned off here!");
+              InsertBefore(MDelayInst, MBB, MII);
+
+              // In case (1), delete it and don't replace with anything!
+              // Otherwise (i.e., case (2) only) replace it with a NOP.
+              if (cond1) {
+                assert(AddedInstrMap[MInst].InstrnsAfter.size() <= delaySlots &&
+                       "Cannot put more than #delaySlots spill instrns after "
+                       "branch or return! Need to handle spill differently.");
+                DeleteInstruction(MBB, MII); // MII now points to next inst.
+              }
+              else {
+                MachineInstr* nopI =BuildMI(TM.getInstrInfo().getNOPOpCode(),1);
+                SubstituteInPlace(nopI, MBB, MII+1); // replace with NOP
+              }
+            }
+
+          // If this is not a branch or return (probably a call),
+          // the Instrnsafter, if any, must really go after the last
+          // delay slot.  Move the InstrAfter to the instr. in that slot.
+          // We must do this after the previous code because the instructions
+          // in delay slots may get moved out by that code.
+          // 
+          if (!isBranch)
+            move2DelayedInstr(MInst,  *(MII+delaySlots));
+        }
+
+    // Finally iterate over all instructions in BB and insert before/after
+    // 
     for (MachineBasicBlock::iterator MII = MBB.begin();
          MII != MBB.end(); ++MII) {  
 
@@ -507,82 +655,10 @@ void PhyRegAlloc::updateMachineCode() {
       if (TM.getInstrInfo().isDummyPhiInstr(Opcode))
 	continue;
 
-      // Reset tmp stack positions so they can be reused for each machine instr.
-      MF.getInfo()->popAllTempValues();  
-	
-      // Now insert speical instructions (if necessary) for call/return
-      // instructions. 
-      //
-      if (TM.getInstrInfo().isCall(Opcode) ||
-          TM.getInstrInfo().isReturn(Opcode)) {
-        AddedInstrns &AI = AddedInstrMap[MInst];
-	
-        if (TM.getInstrInfo().isCall(Opcode))
-          MRI.colorCallArgs(MInst, LRI, &AI, *this, MBB.getBasicBlock());
-        else if (TM.getInstrInfo().isReturn(Opcode))
-          MRI.colorRetValue(MInst, LRI, &AI);
-      }
-      
-      // Set the registers for operands in the machine instruction
-      // if a register was successfully allocated.  If not, insert
-      // code to spill the register value.
-      // 
-      for (unsigned OpNum=0; OpNum < MInst->getNumOperands(); ++OpNum)
-        {
-          MachineOperand& Op = MInst->getOperand(OpNum);
-          if (Op.getType() ==  MachineOperand::MO_VirtualRegister || 
-              Op.getType() ==  MachineOperand::MO_CCRegister)
-            {
-              const Value *const Val =  Op.getVRegValue();
-          
-              LiveRange *const LR = LRI.getLiveRangeForValue(Val);
-              if (!LR)              // consts or labels will have no live range
-                {
-                  // if register is not allocated, mark register as invalid
-                  if (Op.getAllocatedRegNum() == -1)
-                    MInst->SetRegForOperand(OpNum, MRI.getInvalidRegNum()); 
-                  continue;
-                }
-          
-              if (LR->hasColor())
-                MInst->SetRegForOperand(OpNum,
-                                MRI.getUnifiedRegNum(LR->getRegClass()->getID(),
-                                                     LR->getColor()));
-              else
-                // LR did NOT receive a color (register). Insert spill code.
-                insertCode4SpilledLR(LR, MInst, MBB.getBasicBlock(), OpNum);
-            }
-        } // for each operand
-
       // Now add instructions that the register allocator inserts before/after 
       // this machine instructions (done only for calls/rets/incoming args)
       // We do this here, to ensure that spill for an instruction is inserted
       // closest as possible to an instruction (see above insertCode4Spill...)
-      // 
-      // First, if the instruction in the delay slot of a branch needs
-      // instructions inserted, move it out of the delay slot and before the
-      // branch because putting code before or after it would be VERY BAD!
-      // 
-      unsigned bumpIteratorBy = 0;
-      if (MII != MBB.begin())
-        if (unsigned predDelaySlots =
-            TM.getInstrInfo().getNumDelaySlots((*(MII-1))->getOpCode()))
-          {
-            assert(predDelaySlots==1 && "Not handling multiple delay slots!");
-            if (TM.getInstrInfo().isBranch((*(MII-1))->getOpCode())
-                && (AddedInstrMap.count(MInst) ||
-                    AddedInstrMap[MInst].InstrnsAfter.size() > 0))
-            {
-              // Current instruction is in the delay slot of a branch and it
-              // needs spill code inserted before or after it.
-              // Move it before the preceding branch.
-              InsertBefore(MInst, MBB, --MII);
-              MachineInstr* nopI = BuildMI(TM.getInstrInfo().getNOPOpCode(),1);
-              SubstituteInPlace(nopI, MBB, MII+1); // replace orig with NOP
-              --MII;                  // point to MInst in new location
-              bumpIteratorBy = 2;     // later skip the branch and the NOP!
-            }
-          }
 
       // If there are instructions to be added, *before* this machine
       // instruction, add them now.
@@ -592,39 +668,12 @@ void PhyRegAlloc::updateMachineCode() {
       }
       
       // If there are instructions to be added *after* this machine
-      // instruction, add them now
-      //
+      // instruction, add them now.  All cases with delay slots have been
+      // c
       if (!AddedInstrMap[MInst].InstrnsAfter.empty()) {
-
-	// if there are delay slots for this instruction, the instructions
-	// added after it must really go after the delayed instruction(s)
-	// So, we move the InstrAfter of the current instruction to the 
-	// corresponding delayed instruction
-	if (unsigned delay =
-            TM.getInstrInfo().getNumDelaySlots(MInst->getOpCode())) { 
-          
-          // Delayed instructions are typically branches or calls.  Let's make
-          // sure this is not a branch, otherwise "insert-after" is meaningless,
-          // and should never happen for any reason (spill code, register
-          // restores, etc.).
-          assert(! TM.getInstrInfo().isBranch(MInst->getOpCode()) &&
-                 ! TM.getInstrInfo().isReturn(MInst->getOpCode()) &&
-                 "INTERNAL ERROR: Register allocator should not be inserting "
-                 "any code after a branch or return!");
-
-	  move2DelayedInstr(MInst,  *(MII+delay) );
-	}
-	else {
-	  // Here we can add the "instructions after" to the current
-	  // instruction since there are no delay slots for this instruction
-	  AppendInstructions(AddedInstrMap[MInst].InstrnsAfter, MBB, MII,"");
-	}  // if not delay
+        AppendInstructions(AddedInstrMap[MInst].InstrnsAfter, MBB, MII,"");
       }
 
-      // If we mucked with the instruction order above, adjust the loop iterator
-      if (bumpIteratorBy)
-        MII = MII + bumpIteratorBy;
-
     } // for each machine instruction
   }
 }
@@ -677,6 +726,8 @@ void PhyRegAlloc::insertCode4SpilledLR(const LiveRange *LR,
   // We may need a scratch register to copy the spilled value to/from memory.
   // This may itself have to insert code to free up a scratch register.  
   // Any such code should go before (after) the spill code for a load (store).
+  // The scratch reg is not marked as used because it is only used
+  // for the copy and not used across MInst.
   int scratchRegType = -1;
   int scratchReg = -1;
   if (MRI.regTypeNeedsScratchReg(RegType, scratchRegType))
@@ -684,7 +735,6 @@ void PhyRegAlloc::insertCode4SpilledLR(const LiveRange *LR,
       scratchReg = getUsableUniRegAtMI(scratchRegType, &LVSetBef,
                                        MInst, MIBef, MIAft);
       assert(scratchReg != MRI.getInvalidRegNum());
-      MInst->insertUsedReg(scratchReg); 
     }
   
   if (!isDef || isDefAndUse) {
@@ -788,6 +838,7 @@ int PhyRegAlloc::getUsableUniRegAtMI(const int RegType,
 // Return register number is relative to the register class. NOT
 // unified number
 //----------------------------------------------------------------------------
+
 int PhyRegAlloc::getUnusedUniRegAtMI(RegClass *RC, 
 				  const MachineInstr *MInst, 
 				  const ValueSet *LVSetBef) {
@@ -816,7 +867,7 @@ int PhyRegAlloc::getUnusedUniRegAtMI(RegClass *RC,
   // It is possible that one operand of this MInst was already spilled
   // and it received some register temporarily. If that's the case,
   // it is recorded in machine operand. We must skip such registers.
-
+  // 
   setRelRegsUsedByThisInst(RC, MInst);
 
   for (unsigned c=0; c < NumAvailRegs; c++)   // find first unused color
@@ -857,16 +908,21 @@ int PhyRegAlloc::getUniRegNotUsedByThisInst(RegClass *RC,
 // instructions. Both explicit and implicit operands are set.
 //----------------------------------------------------------------------------
 void PhyRegAlloc::setRelRegsUsedByThisInst(RegClass *RC, 
-                                           const MachineInstr *MInst ) {
+                                           const MachineInstr *MInst )
+{
+  assert(OperandsColoredMap[MInst] == true &&
+         "Illegal to call setRelRegsUsedByThisInst() until colored operands "
+         "are marked for an instruction.");
 
   vector<bool> &IsColorUsedArr = RC->getIsColorUsedArr();
   
   // Add the registers already marked as used by the instruction. 
   // This should include any scratch registers that are used to save
   // values across the instruction (e.g., for saving state register values).
-  const vector<bool> &regsUsed = MInst->getRegsUsed();
-  for (unsigned i = 0, e = regsUsed.size(); i != e; ++i)
-    if (regsUsed[i]) {
+  const std::set<int> &regsUsed = MInst->getRegsUsed();
+  for (std::set<int>::iterator I=regsUsed.begin(), E=regsUsed.end(); I != E; ++I)
+    {
+      int i = *I;
       unsigned classId = 0;
       int classRegNum = MRI.getClassRegNum(i, classId);
       if (RC->getID() == classId)
@@ -876,24 +932,7 @@ void PhyRegAlloc::setRelRegsUsedByThisInst(RegClass *RC,
           IsColorUsedArr[classRegNum] = true;
         }
     }
-  
-  // Now add registers allocated to the live ranges of values used in
-  // the instruction.  These are not yet recorded in the instruction.
-  for (unsigned OpNum=0; OpNum < MInst->getNumOperands(); ++OpNum)
-    {
-      const MachineOperand& Op = MInst->getOperand(OpNum);
-      
-      if (Op.getType() == MachineOperand::MO_VirtualRegister || 
-          Op.getType() == MachineOperand::MO_CCRegister)
-        if (const Value* Val = Op.getVRegValue())
-          if (MRI.getRegClassIDOfType(Val->getType()) == RC->getID())
-            if (Op.getAllocatedRegNum() == -1)
-              if (LiveRange *LROfVal = LRI.getLiveRangeForValue(Val))
-                if (LROfVal->hasColor() )
-                  // this operand is in a LR that received a color
-                  IsColorUsedArr[LROfVal->getColor()] = true;
-    }
-  
+
   // If there are implicit references, mark their allocated regs as well
   // 
   for (unsigned z=0; z < MInst->getNumImplicitRefs(); z++)
@@ -910,22 +949,19 @@ void PhyRegAlloc::setRelRegsUsedByThisInst(RegClass *RC,
 // added after it must really go after the delayed instruction(s).
 // So, we move the InstrAfter of that instruction to the 
 // corresponding delayed instruction using the following method.
-
 //----------------------------------------------------------------------------
-void PhyRegAlloc::move2DelayedInstr(const MachineInstr *OrigMI,
-                                    const MachineInstr *DelayedMI) {
 
+void PhyRegAlloc::move2DelayedInstr(const MachineInstr *OrigMI,
+                                    const MachineInstr *DelayedMI)
+{
   // "added after" instructions of the original instr
   std::vector<MachineInstr *> &OrigAft = AddedInstrMap[OrigMI].InstrnsAfter;
 
-  // "added instructions" of the delayed instr
-  AddedInstrns &DelayAdI = AddedInstrMap[DelayedMI];
-
   // "added after" instructions of the delayed instr
-  std::vector<MachineInstr *> &DelayedAft = DelayAdI.InstrnsAfter;
+  std::vector<MachineInstr *> &DelayedAft =AddedInstrMap[DelayedMI].InstrnsAfter;
 
   // go thru all the "added after instructions" of the original instruction
-  // and append them to the "addded after instructions" of the delayed
+  // and append them to the "added after instructions" of the delayed
   // instructions
   DelayedAft.insert(DelayedAft.end(), OrigAft.begin(), OrigAft.end());
 
@@ -1163,7 +1199,9 @@ void PhyRegAlloc::allocateRegisters()
   //
   allocateStackSpace4SpilledLRs();
 
-  MF.getInfo()->popAllTempValues();  // TODO **Check
+  // Reset the temp. area on the stack before use by the first instruction.
+  // This will also happen after updating each instruction.
+  MF.getInfo()->popAllTempValues();
 
   // color incoming args - if the correct color was not received
   // insert code to copy to the correct register
diff --git a/lib/Target/SparcV9/RegAlloc/LiveRangeInfo.cpp b/lib/Target/SparcV9/RegAlloc/LiveRangeInfo.cpp
index 15584f14a57..c774f935e63 100644
--- a/lib/Target/SparcV9/RegAlloc/LiveRangeInfo.cpp
+++ b/lib/Target/SparcV9/RegAlloc/LiveRangeInfo.cpp
@@ -52,6 +52,9 @@ LiveRangeInfo::~LiveRangeInfo() {
 
 void LiveRangeInfo::unionAndUpdateLRs(LiveRange *L1, LiveRange *L2) {
   assert(L1 != L2 && (!L1->hasSuggestedColor() || !L2->hasSuggestedColor()));
+  assert(! (L1->hasColor() && L2->hasColor()) ||
+         L1->getColor() == L2->getColor());
+
   set_union(*L1, *L2);                   // add elements of L2 to L1
 
   for(ValueSet::iterator L2It = L2->begin(); L2It != L2->end(); ++L2It) {
@@ -61,21 +64,23 @@ void LiveRangeInfo::unionAndUpdateLRs(LiveRange *L1, LiveRange *L2) {
     LiveRangeMap[*L2It] = L1;           // now the elements in L2 should map 
                                         //to L1    
   }
-
-
-  // Now if LROfDef(L1) has a suggested color, it will remain.
-  // But, if LROfUse(L2) has a suggested color, the new range
-  // must have the same color.
-
-  if(L2->hasSuggestedColor())
-    L1->setSuggestedColor(L2->getSuggestedColor());
-
-
+  
+  // set call interference for L1 from L2
   if (L2->isCallInterference())
     L1->setCallInterference();
   
   // add the spill costs
   L1->addSpillCost(L2->getSpillCost());
+
+  // If L2 has a color, give L1 that color.  Note that L1 may have had the same
+  // color or none, but would not have a different color as asserted above.
+  if (L2->hasColor())
+    L1->setColor(L2->getColor());
+
+  // Similarly, if LROfUse(L2) has a suggested color, the new range
+  // must have the same color.
+  if (L2->hasSuggestedColor())
+    L1->setSuggestedColor(L2->getSuggestedColor());
   
   delete L2;                        // delete L2 as it is no longer needed
 }
@@ -174,7 +179,16 @@ void LiveRangeInfo::constructLiveRanges() {
 	  const Value *Def = *OpI;
           bool isCC = (OpI.getMachineOperand().getType()
                        == MachineOperand::MO_CCRegister);
-          createOrAddToLiveRange(Def, isCC);
+          LiveRange* LR = createOrAddToLiveRange(Def, isCC);
+
+          // If the operand has a pre-assigned register,
+          // set it directly in the LiveRange
+          if (OpI.getMachineOperand().hasAllocatedReg()) {
+            unsigned getClassId;
+            LR->setColor(MRI.getClassRegNum(
+                                OpI.getMachineOperand().getAllocatedRegNum(),
+                                getClassId));
+          }
 	}
 
       // iterate over implicit MI operands and create a new LR
@@ -183,7 +197,16 @@ void LiveRangeInfo::constructLiveRanges() {
 	if (MInst->getImplicitOp(i).opIsDefOnly() ||
             MInst->getImplicitOp(i).opIsDefAndUse()) {     
 	  const Value *Def = MInst->getImplicitRef(i);
-          createOrAddToLiveRange(Def, /*isCC*/ false);
+          LiveRange* LR = createOrAddToLiveRange(Def, /*isCC*/ false);
+
+          // If the implicit operand has a pre-assigned register,
+          // set it directly in the LiveRange
+          if (MInst->getImplicitOp(i).hasAllocatedReg()) {
+            unsigned getClassId;
+            LR->setColor(MRI.getClassRegNum(
+                                MInst->getImplicitOp(i).getAllocatedRegNum(),
+                                getClassId));
+          }
 	}
 
     } // for all machine instructions in the BB
@@ -243,6 +266,54 @@ void LiveRangeInfo::suggestRegs4CallRets() {
 
 */
 //---------------------------------------------------------------------------
+
+
+// Checks if live range LR interferes with any node assigned or suggested to
+// be assigned the specified color
+// 
+inline bool InterferesWithColor(const LiveRange& LR, unsigned color)
+{
+  IGNode* lrNode = LR.getUserIGNode();
+  for (unsigned n=0, NN = lrNode->getNumOfNeighbors(); n < NN; n++) {
+    LiveRange *neighLR = lrNode->getAdjIGNode(n)->getParentLR();
+    if (neighLR->hasColor() && neighLR->getColor() == color)
+      return true;
+    if (neighLR->hasSuggestedColor() && neighLR->getSuggestedColor() == color)
+      return true;
+  }
+  return false;
+}
+
+// Cannot coalesce if any of the following is true:
+// (1) Both LRs have suggested colors (should be "different suggested colors"?)
+// (2) Both LR1 and LR2 have colors and the colors are different
+//    (but if the colors are the same, it is definitely safe to coalesce)
+// (3) LR1 has color and LR2 interferes with any LR that has the same color
+// (4) LR2 has color and LR1 interferes with any LR that has the same color
+// 
+inline bool InterfsPreventCoalescing(const LiveRange& LROfDef,
+                                     const LiveRange& LROfUse)
+{
+  // (4) if they have different suggested colors, cannot coalesce
+  if (LROfDef.hasSuggestedColor() && LROfUse.hasSuggestedColor())
+    return true;
+
+  // if neither has a color, nothing more to do.
+  if (! LROfDef.hasColor() && ! LROfUse.hasColor())
+    return false;
+
+  // (2, 3) if L1 has color...
+  if (LROfDef.hasColor()) {
+    if (LROfUse.hasColor())
+      return (LROfUse.getColor() != LROfDef.getColor());
+    return InterferesWithColor(LROfUse, LROfDef.getColor());
+  }
+
+  // (4) else only LROfUse has a color: check if that could interfere
+  return InterferesWithColor(LROfDef, LROfUse.getColor());
+}
+
+
 void LiveRangeInfo::coalesceLRs()  
 {
   if(DEBUG_RA >= RA_DEBUG_LiveRanges) 
@@ -298,10 +369,9 @@ void LiveRangeInfo::coalesceLRs()
                 }
 
 		if (CombinedDegree <= RCOfDef->getNumOfAvailRegs()) {
-		  // if both LRs do not have suggested colors
-		  if (!(LROfDef->hasSuggestedColor() &&  
-                        LROfUse->hasSuggestedColor())) {
-		    
+		  // if both LRs do not have different pre-assigned colors
+		  // and both LRs do not have suggested colors
+                  if (! InterfsPreventCoalescing(*LROfDef, *LROfUse)) {
 		    RCOfDef->mergeIGNodesOfLRs(LROfDef, LROfUse);
 		    unionAndUpdateLRs(LROfDef, LROfUse);
 		  }
@@ -319,10 +389,6 @@ void LiveRangeInfo::coalesceLRs()
     cerr << "\nCoalescing Done!\n";
 }
 
-
-
-
-
 /*--------------------------- Debug code for printing ---------------*/
 
 
diff --git a/lib/Target/SparcV9/RegAlloc/PhyRegAlloc.cpp b/lib/Target/SparcV9/RegAlloc/PhyRegAlloc.cpp
index 409916fb02b..613c16db751 100644
--- a/lib/Target/SparcV9/RegAlloc/PhyRegAlloc.cpp
+++ b/lib/Target/SparcV9/RegAlloc/PhyRegAlloc.cpp
@@ -432,6 +432,13 @@ InsertAfter(MachineInstr* newMI,
   MII = MBB.insert(MII, newMI);
 }
 
+inline void
+DeleteInstruction(MachineBasicBlock& MBB,
+                  MachineBasicBlock::iterator& MII)
+{
+  MII = MBB.erase(MII);
+}
+
 inline void
 SubstituteInPlace(MachineInstr* newMI,
                   MachineBasicBlock& MBB,
@@ -483,7 +490,72 @@ AppendInstructions(std::vector<MachineInstr *> &IAft,
 }
 
 
-void PhyRegAlloc::updateMachineCode() {
+void PhyRegAlloc::updateInstruction(MachineInstr* MInst, BasicBlock* BB)
+{
+  unsigned Opcode = MInst->getOpCode();
+  
+  // Reset tmp stack positions so they can be reused for each machine instr.
+  MF.getInfo()->popAllTempValues();  
+      
+  // First, set the registers for operands in the machine instruction
+  // if a register was successfully allocated.  Do this first because we
+  // will need to know which registers are already used by this instr'n.
+  // 
+  for (unsigned OpNum=0; OpNum < MInst->getNumOperands(); ++OpNum)
+    {
+      MachineOperand& Op = MInst->getOperand(OpNum);
+      if (Op.getType() ==  MachineOperand::MO_VirtualRegister || 
+          Op.getType() ==  MachineOperand::MO_CCRegister)
+        {
+          const Value *const Val =  Op.getVRegValue();
+          if (const LiveRange* LR = LRI.getLiveRangeForValue(Val))
+            if (LR->hasColor())
+              MInst->SetRegForOperand(OpNum,
+                          MRI.getUnifiedRegNum(LR->getRegClass()->getID(),
+                                               LR->getColor()));
+        }
+    } // for each operand
+  
+  // Mark that the operands have been updated.  setRelRegsUsedByThisInst()
+  // is called to find registers used by each MachineInst, and it should not
+  // be used for an instruction until this is done.  This flag just serves
+  // as a sanity check.
+  OperandsColoredMap[MInst] = true;
+
+  // Now insert special instructions (if necessary) for call/return
+  // instructions.  Do this before inserting spill code since some
+  // registers must be used by outgoing call arguments or the return value
+  // of a call, and spill code should not use those registers.
+  //
+  if (TM.getInstrInfo().isCall(Opcode) ||
+      TM.getInstrInfo().isReturn(Opcode)) {
+    AddedInstrns &AI = AddedInstrMap[MInst];
+	
+    if (TM.getInstrInfo().isCall(Opcode))
+      MRI.colorCallArgs(MInst, LRI, &AI, *this, BB);
+    else if (TM.getInstrInfo().isReturn(Opcode))
+      MRI.colorRetValue(MInst, LRI, &AI);
+  }
+      
+  // Now insert spill code for remaining operands not allocated to
+  // registers.  This must be done even for call return instructions
+  // since those are not handled by the special code above.
+  for (unsigned OpNum=0; OpNum < MInst->getNumOperands(); ++OpNum)
+    {
+      MachineOperand& Op = MInst->getOperand(OpNum);
+      if (Op.getType() ==  MachineOperand::MO_VirtualRegister || 
+          Op.getType() ==  MachineOperand::MO_CCRegister)
+        {
+          const Value* Val = Op.getVRegValue();
+          if (const LiveRange *LR = LRI.getLiveRangeForValue(Val))
+            if (! LR->hasColor())
+              insertCode4SpilledLR(LR, MInst, BB, OpNum);
+        }
+    } // for each operand
+}
+
+void PhyRegAlloc::updateMachineCode()
+{
   // Insert any instructions needed at method entry
   MachineBasicBlock::iterator MII = MF.front().begin();
   PrependInstructions(AddedInstrAtEntry.InstrnsBefore, MF.front(), MII,
@@ -495,8 +567,84 @@ void PhyRegAlloc::updateMachineCode() {
   for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end();
        BBI != BBE; ++BBI) {
 
-    // iterate over all the machine instructions in BB
     MachineBasicBlock &MBB = *BBI;
+
+    // Iterate over all machine instructions in BB and mark operands with
+    // their assigned registers or insert spill code, as appropriate. 
+    // Also, fix operands of call/return instructions.
+    // 
+    for (MachineBasicBlock::iterator MII = MBB.begin(); MII != MBB.end(); ++MII)
+      if (!TM.getInstrInfo().isDummyPhiInstr((*MII)->getOpCode())) // ignore Phis
+        updateInstruction(*MII, MBB.getBasicBlock());
+
+    // Now, move code out of delay slots of branches and returns if needed.
+    // (Also, move "after" code from calls to the last delay slot instruction.)
+    // Moving code out of delay slots is needed in 2 situations:
+    // (1) If this is a branch and it needs instructions inserted after it,
+    //     move any existing instructions out of the delay slot so that the
+    //     instructions can go into the delay slot.  This only supports the
+    //     case that #instrsAfter <= #delay slots.
+    // 
+    // (2) If any instruction in the delay slot needs
+    //     instructions inserted, move it out of the delay slot and before the
+    //     branch because putting code before or after it would be VERY BAD!
+    // 
+    // If the annul bit of the branch is set, neither of these is legal!
+    // If so, we need to handle spill differently but annulling is not yet used.
+    //
+    for (MachineBasicBlock::iterator MII = MBB.begin();
+         MII != MBB.end(); ++MII)
+      if (unsigned delaySlots =
+          TM.getInstrInfo().getNumDelaySlots((*MII)->getOpCode()))
+        { 
+          assert(delaySlots==1 && "Not handling multiple delay slots!");
+
+          MachineInstr *MInst = *MII; 
+          MachineInstr *MDelayInst = *(MII+1); 
+
+          // Check the 2 conditions above:
+          // (1) Does a branch need instructions added after it?
+          // (2) O/w does delay slot instr. need instrns before or after?
+          bool isBranch = (TM.getInstrInfo().isBranch((*MII)->getOpCode()) ||
+                           TM.getInstrInfo().isReturn((*MII)->getOpCode()));
+          bool cond1 = isBranch && AddedInstrMap[MInst].InstrnsAfter.size() > 0;
+          bool cond2 = (AddedInstrMap.count(MDelayInst) ||
+                        AddedInstrMap[MDelayInst].InstrnsAfter.size() > 0);
+
+          if (cond1 || cond2)
+            {
+              // Move delay slot instrn before the preceding branch.
+              // InsertBefore() modifies MII to point to the branch again.
+              assert(((*MII)->getOpCodeFlags() & AnnulFlag) == 0 &&
+                     "FIXME: Annul bit must be turned off here!");
+              InsertBefore(MDelayInst, MBB, MII);
+
+              // In case (1), delete it and don't replace with anything!
+              // Otherwise (i.e., case (2) only) replace it with a NOP.
+              if (cond1) {
+                assert(AddedInstrMap[MInst].InstrnsAfter.size() <= delaySlots &&
+                       "Cannot put more than #delaySlots spill instrns after "
+                       "branch or return! Need to handle spill differently.");
+                DeleteInstruction(MBB, MII); // MII now points to next inst.
+              }
+              else {
+                MachineInstr* nopI =BuildMI(TM.getInstrInfo().getNOPOpCode(),1);
+                SubstituteInPlace(nopI, MBB, MII+1); // replace with NOP
+              }
+            }
+
+          // If this is not a branch or return (probably a call),
+          // the Instrnsafter, if any, must really go after the last
+          // delay slot.  Move the InstrAfter to the instr. in that slot.
+          // We must do this after the previous code because the instructions
+          // in delay slots may get moved out by that code.
+          // 
+          if (!isBranch)
+            move2DelayedInstr(MInst,  *(MII+delaySlots));
+        }
+
+    // Finally iterate over all instructions in BB and insert before/after
+    // 
     for (MachineBasicBlock::iterator MII = MBB.begin();
          MII != MBB.end(); ++MII) {  
 
@@ -507,82 +655,10 @@ void PhyRegAlloc::updateMachineCode() {
       if (TM.getInstrInfo().isDummyPhiInstr(Opcode))
 	continue;
 
-      // Reset tmp stack positions so they can be reused for each machine instr.
-      MF.getInfo()->popAllTempValues();  
-	
-      // Now insert speical instructions (if necessary) for call/return
-      // instructions. 
-      //
-      if (TM.getInstrInfo().isCall(Opcode) ||
-          TM.getInstrInfo().isReturn(Opcode)) {
-        AddedInstrns &AI = AddedInstrMap[MInst];
-	
-        if (TM.getInstrInfo().isCall(Opcode))
-          MRI.colorCallArgs(MInst, LRI, &AI, *this, MBB.getBasicBlock());
-        else if (TM.getInstrInfo().isReturn(Opcode))
-          MRI.colorRetValue(MInst, LRI, &AI);
-      }
-      
-      // Set the registers for operands in the machine instruction
-      // if a register was successfully allocated.  If not, insert
-      // code to spill the register value.
-      // 
-      for (unsigned OpNum=0; OpNum < MInst->getNumOperands(); ++OpNum)
-        {
-          MachineOperand& Op = MInst->getOperand(OpNum);
-          if (Op.getType() ==  MachineOperand::MO_VirtualRegister || 
-              Op.getType() ==  MachineOperand::MO_CCRegister)
-            {
-              const Value *const Val =  Op.getVRegValue();
-          
-              LiveRange *const LR = LRI.getLiveRangeForValue(Val);
-              if (!LR)              // consts or labels will have no live range
-                {
-                  // if register is not allocated, mark register as invalid
-                  if (Op.getAllocatedRegNum() == -1)
-                    MInst->SetRegForOperand(OpNum, MRI.getInvalidRegNum()); 
-                  continue;
-                }
-          
-              if (LR->hasColor())
-                MInst->SetRegForOperand(OpNum,
-                                MRI.getUnifiedRegNum(LR->getRegClass()->getID(),
-                                                     LR->getColor()));
-              else
-                // LR did NOT receive a color (register). Insert spill code.
-                insertCode4SpilledLR(LR, MInst, MBB.getBasicBlock(), OpNum);
-            }
-        } // for each operand
-
       // Now add instructions that the register allocator inserts before/after 
       // this machine instructions (done only for calls/rets/incoming args)
       // We do this here, to ensure that spill for an instruction is inserted
       // closest as possible to an instruction (see above insertCode4Spill...)
-      // 
-      // First, if the instruction in the delay slot of a branch needs
-      // instructions inserted, move it out of the delay slot and before the
-      // branch because putting code before or after it would be VERY BAD!
-      // 
-      unsigned bumpIteratorBy = 0;
-      if (MII != MBB.begin())
-        if (unsigned predDelaySlots =
-            TM.getInstrInfo().getNumDelaySlots((*(MII-1))->getOpCode()))
-          {
-            assert(predDelaySlots==1 && "Not handling multiple delay slots!");
-            if (TM.getInstrInfo().isBranch((*(MII-1))->getOpCode())
-                && (AddedInstrMap.count(MInst) ||
-                    AddedInstrMap[MInst].InstrnsAfter.size() > 0))
-            {
-              // Current instruction is in the delay slot of a branch and it
-              // needs spill code inserted before or after it.
-              // Move it before the preceding branch.
-              InsertBefore(MInst, MBB, --MII);
-              MachineInstr* nopI = BuildMI(TM.getInstrInfo().getNOPOpCode(),1);
-              SubstituteInPlace(nopI, MBB, MII+1); // replace orig with NOP
-              --MII;                  // point to MInst in new location
-              bumpIteratorBy = 2;     // later skip the branch and the NOP!
-            }
-          }
 
       // If there are instructions to be added, *before* this machine
       // instruction, add them now.
@@ -592,39 +668,12 @@ void PhyRegAlloc::updateMachineCode() {
       }
       
       // If there are instructions to be added *after* this machine
-      // instruction, add them now
-      //
+      // instruction, add them now.  All cases with delay slots have been
+      // c
       if (!AddedInstrMap[MInst].InstrnsAfter.empty()) {
-
-	// if there are delay slots for this instruction, the instructions
-	// added after it must really go after the delayed instruction(s)
-	// So, we move the InstrAfter of the current instruction to the 
-	// corresponding delayed instruction
-	if (unsigned delay =
-            TM.getInstrInfo().getNumDelaySlots(MInst->getOpCode())) { 
-          
-          // Delayed instructions are typically branches or calls.  Let's make
-          // sure this is not a branch, otherwise "insert-after" is meaningless,
-          // and should never happen for any reason (spill code, register
-          // restores, etc.).
-          assert(! TM.getInstrInfo().isBranch(MInst->getOpCode()) &&
-                 ! TM.getInstrInfo().isReturn(MInst->getOpCode()) &&
-                 "INTERNAL ERROR: Register allocator should not be inserting "
-                 "any code after a branch or return!");
-
-	  move2DelayedInstr(MInst,  *(MII+delay) );
-	}
-	else {
-	  // Here we can add the "instructions after" to the current
-	  // instruction since there are no delay slots for this instruction
-	  AppendInstructions(AddedInstrMap[MInst].InstrnsAfter, MBB, MII,"");
-	}  // if not delay
+        AppendInstructions(AddedInstrMap[MInst].InstrnsAfter, MBB, MII,"");
       }
 
-      // If we mucked with the instruction order above, adjust the loop iterator
-      if (bumpIteratorBy)
-        MII = MII + bumpIteratorBy;
-
     } // for each machine instruction
   }
 }
@@ -677,6 +726,8 @@ void PhyRegAlloc::insertCode4SpilledLR(const LiveRange *LR,
   // We may need a scratch register to copy the spilled value to/from memory.
   // This may itself have to insert code to free up a scratch register.  
   // Any such code should go before (after) the spill code for a load (store).
+  // The scratch reg is not marked as used because it is only used
+  // for the copy and not used across MInst.
   int scratchRegType = -1;
   int scratchReg = -1;
   if (MRI.regTypeNeedsScratchReg(RegType, scratchRegType))
@@ -684,7 +735,6 @@ void PhyRegAlloc::insertCode4SpilledLR(const LiveRange *LR,
       scratchReg = getUsableUniRegAtMI(scratchRegType, &LVSetBef,
                                        MInst, MIBef, MIAft);
       assert(scratchReg != MRI.getInvalidRegNum());
-      MInst->insertUsedReg(scratchReg); 
     }
   
   if (!isDef || isDefAndUse) {
@@ -788,6 +838,7 @@ int PhyRegAlloc::getUsableUniRegAtMI(const int RegType,
 // Return register number is relative to the register class. NOT
 // unified number
 //----------------------------------------------------------------------------
+
 int PhyRegAlloc::getUnusedUniRegAtMI(RegClass *RC, 
 				  const MachineInstr *MInst, 
 				  const ValueSet *LVSetBef) {
@@ -816,7 +867,7 @@ int PhyRegAlloc::getUnusedUniRegAtMI(RegClass *RC,
   // It is possible that one operand of this MInst was already spilled
   // and it received some register temporarily. If that's the case,
   // it is recorded in machine operand. We must skip such registers.
-
+  // 
   setRelRegsUsedByThisInst(RC, MInst);
 
   for (unsigned c=0; c < NumAvailRegs; c++)   // find first unused color
@@ -857,16 +908,21 @@ int PhyRegAlloc::getUniRegNotUsedByThisInst(RegClass *RC,
 // instructions. Both explicit and implicit operands are set.
 //----------------------------------------------------------------------------
 void PhyRegAlloc::setRelRegsUsedByThisInst(RegClass *RC, 
-                                           const MachineInstr *MInst ) {
+                                           const MachineInstr *MInst )
+{
+  assert(OperandsColoredMap[MInst] == true &&
+         "Illegal to call setRelRegsUsedByThisInst() until colored operands "
+         "are marked for an instruction.");
 
   vector<bool> &IsColorUsedArr = RC->getIsColorUsedArr();
   
   // Add the registers already marked as used by the instruction. 
   // This should include any scratch registers that are used to save
   // values across the instruction (e.g., for saving state register values).
-  const vector<bool> &regsUsed = MInst->getRegsUsed();
-  for (unsigned i = 0, e = regsUsed.size(); i != e; ++i)
-    if (regsUsed[i]) {
+  const std::set<int> &regsUsed = MInst->getRegsUsed();
+  for (std::set<int>::iterator I=regsUsed.begin(), E=regsUsed.end(); I != E; ++I)
+    {
+      int i = *I;
       unsigned classId = 0;
       int classRegNum = MRI.getClassRegNum(i, classId);
       if (RC->getID() == classId)
@@ -876,24 +932,7 @@ void PhyRegAlloc::setRelRegsUsedByThisInst(RegClass *RC,
           IsColorUsedArr[classRegNum] = true;
         }
     }
-  
-  // Now add registers allocated to the live ranges of values used in
-  // the instruction.  These are not yet recorded in the instruction.
-  for (unsigned OpNum=0; OpNum < MInst->getNumOperands(); ++OpNum)
-    {
-      const MachineOperand& Op = MInst->getOperand(OpNum);
-      
-      if (Op.getType() == MachineOperand::MO_VirtualRegister || 
-          Op.getType() == MachineOperand::MO_CCRegister)
-        if (const Value* Val = Op.getVRegValue())
-          if (MRI.getRegClassIDOfType(Val->getType()) == RC->getID())
-            if (Op.getAllocatedRegNum() == -1)
-              if (LiveRange *LROfVal = LRI.getLiveRangeForValue(Val))
-                if (LROfVal->hasColor() )
-                  // this operand is in a LR that received a color
-                  IsColorUsedArr[LROfVal->getColor()] = true;
-    }
-  
+
   // If there are implicit references, mark their allocated regs as well
   // 
   for (unsigned z=0; z < MInst->getNumImplicitRefs(); z++)
@@ -910,22 +949,19 @@ void PhyRegAlloc::setRelRegsUsedByThisInst(RegClass *RC,
 // added after it must really go after the delayed instruction(s).
 // So, we move the InstrAfter of that instruction to the 
 // corresponding delayed instruction using the following method.
-
 //----------------------------------------------------------------------------
-void PhyRegAlloc::move2DelayedInstr(const MachineInstr *OrigMI,
-                                    const MachineInstr *DelayedMI) {
 
+void PhyRegAlloc::move2DelayedInstr(const MachineInstr *OrigMI,
+                                    const MachineInstr *DelayedMI)
+{
   // "added after" instructions of the original instr
   std::vector<MachineInstr *> &OrigAft = AddedInstrMap[OrigMI].InstrnsAfter;
 
-  // "added instructions" of the delayed instr
-  AddedInstrns &DelayAdI = AddedInstrMap[DelayedMI];
-
   // "added after" instructions of the delayed instr
-  std::vector<MachineInstr *> &DelayedAft = DelayAdI.InstrnsAfter;
+  std::vector<MachineInstr *> &DelayedAft =AddedInstrMap[DelayedMI].InstrnsAfter;
 
   // go thru all the "added after instructions" of the original instruction
-  // and append them to the "addded after instructions" of the delayed
+  // and append them to the "added after instructions" of the delayed
   // instructions
   DelayedAft.insert(DelayedAft.end(), OrigAft.begin(), OrigAft.end());
 
@@ -1163,7 +1199,9 @@ void PhyRegAlloc::allocateRegisters()
   //
   allocateStackSpace4SpilledLRs();
 
-  MF.getInfo()->popAllTempValues();  // TODO **Check
+  // Reset the temp. area on the stack before use by the first instruction.
+  // This will also happen after updating each instruction.
+  MF.getInfo()->popAllTempValues();
 
   // color incoming args - if the correct color was not received
   // insert code to copy to the correct register
diff --git a/lib/Target/SparcV9/SparcV9InstrInfo.cpp b/lib/Target/SparcV9/SparcV9InstrInfo.cpp
index 5111246073e..55f7ece1f5d 100644
--- a/lib/Target/SparcV9/SparcV9InstrInfo.cpp
+++ b/lib/Target/SparcV9/SparcV9InstrInfo.cpp
@@ -270,8 +270,7 @@ CreateUIntSetInstruction(const TargetMachine& target,
     CreateSETSWConst(target, (int32_t) C, dest, mvec);
   } else if (C > lo32) {
     // C does not fit in 32 bits
-    TmpInstruction* tmpReg = new TmpInstruction(Type::IntTy);
-    mcfi.addTemp(tmpReg);
+    TmpInstruction* tmpReg = new TmpInstruction(mcfi, Type::IntTy);
     CreateSETXConst(target, C, tmpReg, dest, mvec);
   }
 }
@@ -424,8 +423,7 @@ UltraSparcInstrInfo::CreateCodeToLoadConst(const TargetMachine& target,
 
   if (isa<GlobalValue>(val)) {
       TmpInstruction* tmpReg =
-        new TmpInstruction(PointerType::get(val->getType()), val);
-      mcfi.addTemp(tmpReg);
+        new TmpInstruction(mcfi, PointerType::get(val->getType()), val);
       CreateSETXLabel(target, val, tmpReg, dest, mvec);
   } else if (valType->isIntegral()) {
     bool isValidConstant;
@@ -468,13 +466,11 @@ UltraSparcInstrInfo::CreateCodeToLoadConst(const TargetMachine& target,
       
     // First, create a tmp register to be used by the SETX sequence.
     TmpInstruction* tmpReg =
-      new TmpInstruction(PointerType::get(val->getType()), val);
-    mcfi.addTemp(tmpReg);
+      new TmpInstruction(mcfi, PointerType::get(val->getType()), val);
       
     // Create another TmpInstruction for the address register
     TmpInstruction* addrReg =
-      new TmpInstruction(PointerType::get(val->getType()), val);
-    mcfi.addTemp(addrReg);
+      new TmpInstruction(mcfi, PointerType::get(val->getType()), val);
       
     // Put the address (a symbolic name) into a register
     CreateSETXLabel(target, val, tmpReg, addrReg, mvec);
@@ -526,7 +522,7 @@ UltraSparcInstrInfo::CreateCodeToCopyIntToFloat(const TargetMachine& target,
   Value* storeVal = val;
   if (srcSize < target.getTargetData().getTypeSize(Type::FloatTy)) {
     // sign- or zero-extend respectively
-    storeVal = new TmpInstruction(storeType, val);
+    storeVal = new TmpInstruction(mcfi, storeType, val);
     if (val->getType()->isSigned())
       CreateSignExtensionInstructions(target, F, val, storeVal, 8*srcSize,
                                       mvec, mcfi);
@@ -552,8 +548,8 @@ UltraSparcInstrInfo::CreateCodeToCopyIntToFloat(const TargetMachine& target,
 // Similarly, create an instruction sequence to copy an FP register
 // `val' to an integer register `dest' by copying to memory and back.
 // The generated instructions are returned in `mvec'.
-// Any temp. registers (TmpInstruction) created are recorded in mcfi.
-// Any stack space required is allocated via MachineFunction.
+// Any temp. virtual registers (TmpInstruction) created are recorded in mcfi.
+// Temporary stack space required is allocated via MachineFunction.
 // 
 void
 UltraSparcInstrInfo::CreateCodeToCopyFloatToInt(const TargetMachine& target,
@@ -570,6 +566,10 @@ UltraSparcInstrInfo::CreateCodeToCopyFloatToInt(const TargetMachine& target,
   assert((destTy->isIntegral() || isa<PointerType>(destTy))
          && "Dest type must be integer, bool or pointer");
 
+  // FIXME: For now, we allocate permanent space because the stack frame
+  // manager does not allow locals to be allocated (e.g., for alloca) after
+  // a temp is allocated!
+  // 
   int offset = MachineFunction::get(F).getInfo()->allocateLocalVar(val); 
 
   unsigned FPReg = target.getRegInfo().getFramePointer();
@@ -639,14 +639,20 @@ UltraSparcInstrInfo::CreateCopyInstructionsByType(const TargetMachine& target,
     target.getInstrInfo().CreateCodeToLoadConst(target, F, src, dest,
                                                 mvec, mcfi);
   } else { 
-    // Create an add-with-0 instruction of the appropriate type.
-    // Make `src' the second operand, in case it is a constant
-    // Use (unsigned long) 0 for a NULL pointer value.
+    // Create a reg-to-reg copy instruction for the given type:
+    // -- For FP values, create a FMOVS or FMOVD instruction
+    // -- For non-FP values, create an add-with-0 instruction (opCode as above)
+    // Make `src' the second operand, in case it is a small constant!
     // 
-    const Type* Ty =isa<PointerType>(resultType) ? Type::ULongTy : resultType;
-    MachineInstr* MI =
-      BuildMI(opCode, 3).addReg(Constant::getNullValue(Ty))
-      .addReg(src).addRegDef(dest);
+    MachineInstr* MI;
+    if (resultType->isFloatingPoint())
+      MI = (BuildMI(resultType == Type::FloatTy? V9::FMOVS : V9::FMOVD, 2)
+            .addReg(src).addRegDef(dest));
+    else {
+        const Type* Ty =isa<PointerType>(resultType)? Type::ULongTy :resultType;
+        MI = (BuildMI(opCode, 3)
+              .addSImm((int64_t) 0).addReg(src).addRegDef(dest));
+    }
     mvec.push_back(MI);
   }
 }
@@ -670,9 +676,8 @@ CreateBitExtensionInstructions(bool signExtend,
 
   if (numLowBits < 32) {
     // SLL is needed since operand size is < 32 bits.
-    TmpInstruction *tmpI = new TmpInstruction(destVal->getType(),
+    TmpInstruction *tmpI = new TmpInstruction(mcfi, destVal->getType(),
                                               srcVal, destVal, "make32");
-    mcfi.addTemp(tmpI);
     mvec.push_back(BuildMI(V9::SLLXi6, 3).addReg(srcVal)
                    .addZImm(32-numLowBits).addRegDef(tmpI));
     srcVal = tmpI;
diff --git a/lib/Target/SparcV9/SparcV9InstrSelection.cpp b/lib/Target/SparcV9/SparcV9InstrSelection.cpp
index 0670b4971ff..93e121a9c4c 100644
--- a/lib/Target/SparcV9/SparcV9InstrSelection.cpp
+++ b/lib/Target/SparcV9/SparcV9InstrSelection.cpp
@@ -126,12 +126,15 @@ FoldGetElemChain(InstrTreeNode* ptrNode, std::vector<Value*>& chainIdxVec,
       // Get pointer value out of ptrChild.
       ptrVal = gepInst->getPointerOperand();
 
-      // Remember if it has leading zero index: it will be discarded later.
-      lastInstHasLeadingNonZero = ! IsZero(*firstIdx);
-
       // Insert its index vector at the start, skipping any leading [0]
-      chainIdxVec.insert(chainIdxVec.begin(),
-                         firstIdx + !lastInstHasLeadingNonZero, lastIdx);
+      // Remember the old size to check if anything was inserted.
+      unsigned oldSize = chainIdxVec.size();
+      int firstIsZero = IsZero(*firstIdx);
+      chainIdxVec.insert(chainIdxVec.begin(), firstIdx + firstIsZero, lastIdx);
+
+      // Remember if it has leading zero index: it will be discarded later.
+      if (oldSize < chainIdxVec.size())
+        lastInstHasLeadingNonZero = !firstIsZero;
 
       // Mark the folded node so no code is generated for it.
       ((InstructionNode*) ptrChild)->markFoldedIntoParent();
@@ -143,7 +146,9 @@ FoldGetElemChain(InstrTreeNode* ptrNode, std::vector<Value*>& chainIdxVec,
   }
 
   // If the first getElementPtr instruction had a leading [0], add it back.
-  // Note that this instruction is the *last* one successfully folded above.
+  // Note that this instruction is the *last* one that was successfully
+  // folded *and* contributed any indices, in the loop above.
+  // 
   if (ptrVal && ! lastInstHasLeadingNonZero) 
     chainIdxVec.insert(chainIdxVec.begin(), ConstantSInt::get(Type::LongTy,0));
 
@@ -355,7 +360,8 @@ ChooseBFpccInstruction(const InstructionNode* instrNode,
 // TmpInstructions will be freed along with the rest of the Function anyway.
 // 
 static TmpInstruction*
-GetTmpForCC(Value* boolVal, const Function *F, const Type* ccType)
+GetTmpForCC(Value* boolVal, const Function *F, const Type* ccType,
+            MachineCodeForInstruction& mcfi)
 {
   typedef hash_map<const Value*, TmpInstruction*> BoolTmpCache;
   static BoolTmpCache boolToTmpCache;     // Map boolVal -> TmpInstruction*
@@ -372,7 +378,7 @@ GetTmpForCC(Value* boolVal, const Function *F, const Type* ccType)
   // directly written to map using the ref returned by operator[].
   TmpInstruction*& tmpI = boolToTmpCache[boolVal];
   if (tmpI == NULL)
-    tmpI = new TmpInstruction(ccType, boolVal);
+    tmpI = new TmpInstruction(mcfi, ccType, boolVal);
   
   return tmpI;
 }
@@ -427,13 +433,9 @@ ChooseMovFpccInstruction(const InstructionNode* instrNode)
 // (The latter two cases do not seem to arise because SetNE needs nothing.)
 // 
 static MachineOpCode
-ChooseMovpccAfterSub(const InstructionNode* instrNode,
-                     bool& mustClearReg,
-                     int& valueToMove)
+ChooseMovpccAfterSub(const InstructionNode* instrNode)
 {
   MachineOpCode opCode = V9::INVALID_OPCODE;
-  mustClearReg = true;
-  valueToMove = 1;
   
   switch(instrNode->getInstruction()->getOpcode())
   {
@@ -442,8 +444,8 @@ ChooseMovpccAfterSub(const InstructionNode* instrNode,
   case Instruction::SetGE: opCode = V9::MOVGE; break;
   case Instruction::SetLT: opCode = V9::MOVL;  break;
   case Instruction::SetGT: opCode = V9::MOVG;  break;
-  case Instruction::SetNE: assert(0 && "No move required!"); break;
-  default:		     assert(0 && "Unrecognized VM instr!"); break; 
+  case Instruction::SetNE: opCode = V9::MOVNE; break;
+  default: assert(0 && "Unrecognized VM instr!"); break; 
   }
   
   return opCode;
@@ -452,45 +454,23 @@ ChooseMovpccAfterSub(const InstructionNode* instrNode,
 static inline MachineOpCode
 ChooseConvertToFloatInstr(OpLabel vopCode, const Type* opType)
 {
+  assert((vopCode == ToFloatTy || vopCode == ToDoubleTy) &&
+         "Unrecognized convert-to-float opcode!");
+
   MachineOpCode opCode = V9::INVALID_OPCODE;
   
-  switch(vopCode)
-  {
-  case ToFloatTy: 
-    if (opType == Type::SByteTy || opType == Type::ShortTy ||
-        opType == Type::IntTy)
-      opCode = V9::FITOS;
-    else if (opType == Type::LongTy)
-      opCode = V9::FXTOS;
-    else if (opType == Type::DoubleTy)
-      opCode = V9::FDTOS;
-    else if (opType == Type::FloatTy)
-      ;
-    else
-      assert(0 && "Cannot convert this type to FLOAT on SPARC");
-    break;
-      
-  case ToDoubleTy: 
-    // This is usually used in conjunction with CreateCodeToCopyIntToFloat().
-    // Both functions should treat the integer as a 32-bit value for types
-    // of 4 bytes or less, and as a 64-bit value otherwise.
-    if (opType == Type::SByteTy || opType == Type::UByteTy ||
-        opType == Type::ShortTy || opType == Type::UShortTy ||
-        opType == Type::IntTy   || opType == Type::UIntTy)
-      opCode = V9::FITOD;
-    else if (opType == Type::LongTy || opType == Type::ULongTy)
-      opCode = V9::FXTOD;
-    else if (opType == Type::FloatTy)
-      opCode = V9::FSTOD;
-    else if (opType == Type::DoubleTy)
-      ;
-    else
-      assert(0 && "Cannot convert this type to DOUBLE on SPARC");
-    break;
-      
-  default:
-    break;
-  }
+  if (opType == Type::SByteTy || opType == Type::UByteTy ||
+      opType == Type::ShortTy || opType == Type::UShortTy ||
+      opType == Type::IntTy   || opType == Type::UIntTy)
+      opCode = (vopCode == ToFloatTy? V9::FITOS : V9::FITOD);
+  else if (opType == Type::LongTy || opType == Type::ULongTy)
+      opCode = (vopCode == ToFloatTy? V9::FXTOS : V9::FXTOD);
+  else if (opType == Type::FloatTy)
+      opCode = (vopCode == ToFloatTy? V9::INVALID_OPCODE : V9::FSTOD);
+  else if (opType == Type::DoubleTy)
+      opCode = (vopCode == ToFloatTy? V9::FDTOS : V9::INVALID_OPCODE);
+  else
+    assert(0 && "Cannot convert this type to DOUBLE on SPARC");
   
   return opCode;
 }
@@ -503,6 +483,12 @@ ChooseConvertFPToIntInstr(Type::PrimitiveID tid, const Type* opType)
   assert((opType == Type::FloatTy || opType == Type::DoubleTy)
          && "This function should only be called for FLOAT or DOUBLE");
 
+  // SPARC does not have a float-to-uint conversion, only a float-to-int.
+  // For converting an FP value to uint32_t, we first need to convert to
+  // uint64_t and then to uint32_t, or we may overflow the signed int
+  // representation even for legal uint32_t values.  This expansion is
+  // done by the Preselection pass.
+  // 
   if (tid == Type::UIntTyID) {
     assert(tid != Type::UIntTyID && "FP-to-uint conversions must be expanded"
            " into FP->long->uint for SPARC v9:  SO RUN PRESELECTION PASS!");
@@ -558,8 +544,7 @@ CreateCodeToConvertFloatToInt(const TargetMachine& target,
   // 
   size_t destSize = target.getTargetData().getTypeSize(destI->getType());
   const Type* destTypeToUse = (destSize > 4)? Type::DoubleTy : Type::FloatTy;
-  TmpInstruction* destForCast = new TmpInstruction(destTypeToUse, opVal);
-  mcfi.addTemp(destForCast);
+  TmpInstruction* destForCast = new TmpInstruction(mcfi, destTypeToUse, opVal);
 
   // Create the fp-to-int conversion code
   MachineInstr* M =CreateConvertFPToIntInstr(destI->getType()->getPrimitiveID(),
@@ -747,10 +732,10 @@ CreateShiftInstructions(const TargetMachine& target,
   // 
   Value* shiftDest = destVal;
   unsigned opSize = target.getTargetData().getTypeSize(argVal1->getType());
+
   if ((shiftOpCode == V9::SLLr6 || shiftOpCode == V9::SLLXr6) && opSize < 8) {
     // put SLL result into a temporary
-    shiftDest = new TmpInstruction(argVal1, optArgVal2, "sllTmp");
-    mcfi.addTemp(shiftDest);
+    shiftDest = new TmpInstruction(mcfi, argVal1, optArgVal2, "sllTmp");
   }
   
   MachineInstr* M = (optArgVal2 != NULL)
@@ -975,10 +960,8 @@ CreateDivConstInstruction(TargetMachine &target,
           TmpInstruction *srlTmp, *addTmp;
           MachineCodeForInstruction& mcfi
             = MachineCodeForInstruction::get(destVal);
-          srlTmp = new TmpInstruction(resultType, LHS, 0, "getSign");
-          addTmp = new TmpInstruction(resultType, LHS, srlTmp, "incIfNeg");
-          mcfi.addTemp(srlTmp);
-          mcfi.addTemp(addTmp);
+          srlTmp = new TmpInstruction(mcfi, resultType, LHS, 0, "getSign");
+          addTmp = new TmpInstruction(mcfi, resultType, LHS, srlTmp,"incIfNeg");
 
           // Create the SRL or SRLX instruction to get the sign bit
           mvec.push_back(BuildMI((resultType==Type::LongTy) ?
@@ -1055,12 +1038,9 @@ CreateCodeForVariableSizeAlloca(const TargetMachine& target,
 
     // Create temporary values to hold the result of MUL, SLL, SRL
     // THIS CASE IS INCOMPLETE AND WILL BE FIXED SHORTLY.
-    TmpInstruction* tmpProd = new TmpInstruction(numElementsVal, tsizeVal);
-    TmpInstruction* tmpSLL  = new TmpInstruction(numElementsVal, tmpProd);
-    TmpInstruction* tmpSRL  = new TmpInstruction(numElementsVal, tmpSLL);
-    mcfi.addTemp(tmpProd);
-    mcfi.addTemp(tmpSLL);
-    mcfi.addTemp(tmpSRL);
+    TmpInstruction* tmpProd = new TmpInstruction(mcfi,numElementsVal, tsizeVal);
+    TmpInstruction* tmpSLL  = new TmpInstruction(mcfi,numElementsVal, tmpProd);
+    TmpInstruction* tmpSRL  = new TmpInstruction(mcfi,numElementsVal, tmpSLL);
 
     // Instruction 1: mul numElements, typeSize -> tmpProd
     // This will optimize the MUL as far as possible.
@@ -1195,8 +1175,9 @@ SetOperandsForMemInstr(unsigned Opcode,
       Value* idxVal = idxVec[firstIdxIsZero];
 
       std::vector<MachineInstr*> mulVec;
-      Instruction* addr = new TmpInstruction(Type::ULongTy, memInst);
-      MachineCodeForInstruction::get(memInst).addTemp(addr);
+      Instruction* addr =
+        new TmpInstruction(MachineCodeForInstruction::get(memInst),
+                           Type::ULongTy, memInst);
 
       // Get the array type indexed by idxVal, and compute its element size.
       // The call to getTypeSize() will fail if size is not constant.
@@ -1463,24 +1444,62 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
       case 2:	// stmt:   RetValue(reg)
       {         // NOTE: Prepass of register allocation is responsible
                 //	 for moving return value to appropriate register.
+                // Copy the return value to the required return register.
+                // Mark the return Value as an implicit ref of the RET instr..
                 // Mark the return-address register as a hidden virtual reg.
-                // Mark the return value   register as an implicit ref of
-                // the machine instruction.
          	// Finally put a NOP in the delay slot.
-        ReturnInst *returnInstr =
-          cast<ReturnInst>(subtreeRoot->getInstruction());
-        assert(returnInstr->getOpcode() == Instruction::Ret);
-        
-        Instruction* returnReg = new TmpInstruction(returnInstr);
-        MachineCodeForInstruction::get(returnInstr).addTemp(returnReg);
-        
-        M = BuildMI(V9::JMPLRETi, 3).addReg(returnReg).addSImm(8)
+        ReturnInst *returnInstr=cast<ReturnInst>(subtreeRoot->getInstruction());
+        Value* retVal = returnInstr->getReturnValue();
+        MachineCodeForInstruction& mcfi =
+          MachineCodeForInstruction::get(returnInstr);
+
+        // Create a hidden virtual reg to represent the return address register
+        // used by the machine instruction but not represented in LLVM.
+        // 
+        Instruction* returnAddrTmp = new TmpInstruction(mcfi, returnInstr);
+
+        MachineInstr* retMI = 
+          BuildMI(V9::JMPLRETi, 3).addReg(returnAddrTmp).addSImm(8)
           .addMReg(target.getRegInfo().getZeroRegNum(), MOTy::Def);
+      
+        // Insert a copy to copy the return value to the appropriate register
+        // -- For FP values, create a FMOVS or FMOVD instruction
+        // -- For non-FP values, create an add-with-0 instruction
+        // 
+        if (retVal != NULL) {
+          const UltraSparcRegInfo& regInfo =
+            (UltraSparcRegInfo&) target.getRegInfo();
+          const Type* retType = retVal->getType();
+          unsigned regClassID = regInfo.getRegClassIDOfType(retType);
+          unsigned retRegNum = (retType->isFloatingPoint()
+                                ? (unsigned) SparcFloatRegClass::f0
+                                : (unsigned) SparcIntRegClass::i0);
+          retRegNum = regInfo.getUnifiedRegNum(regClassID, retRegNum);
+
+          // Create a virtual register to represent it and mark
+          // this vreg as being an implicit operand of the ret MI
+          TmpInstruction* retVReg = 
+            new TmpInstruction(mcfi, retVal, NULL, "argReg");
+            
+          retMI->addImplicitRef(retVReg);
+            
+          if (retType->isFloatingPoint())
+            M = (BuildMI(retType==Type::FloatTy? V9::FMOVS : V9::FMOVD, 2)
+                 .addReg(retVal).addReg(retVReg, MOTy::Def));
+          else
+            M = (BuildMI(ChooseAddInstructionByType(retType), 3)
+                 .addReg(retVal).addSImm((int64_t) 0)
+                 .addReg(retVReg, MOTy::Def));
+
+          // Mark the operand with the register it should be assigned
+          M->SetRegForOperand(M->getNumOperands()-1, retRegNum);
+          retMI->SetRegForImplicitRef(retMI->getNumImplicitRefs()-1, retRegNum);
+
+          mvec.push_back(M);
+        }
         
-        if (returnInstr->getReturnValue() != NULL)
-          M->addImplicitRef(returnInstr->getReturnValue());
-        
-        mvec.push_back(M);
+        // Now insert the RET instruction and a NOP for the delay slot
+        mvec.push_back(retMI);
         mvec.push_back(BuildMI(V9::NOP, 0));
         
         break;
@@ -1560,7 +1579,8 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         unsigned Opcode = ChooseBccInstruction(subtreeRoot, isFPBranch);
         Value* ccValue = GetTmpForCC(subtreeRoot->leftChild()->getValue(),
                                      brInst->getParent()->getParent(),
-                                     isFPBranch? Type::FloatTy : Type::IntTy);
+                                     isFPBranch? Type::FloatTy : Type::IntTy,
+                                     MachineCodeForInstruction::get(brInst));
         M = BuildMI(Opcode, 2).addCCReg(ccValue)
                               .addPCDisp(brInst->getSuccessor(0));
         mvec.push_back(M);
@@ -1593,8 +1613,8 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
       }
         
       case   8:	// stmt:   BrCond(boolreg)
-      { // boolreg   => boolean is stored in an existing register.
-        // Just use the branch-on-integer-register instruction!
+      { // boolreg   => boolean is recorded in an integer register.
+        //              Use branch-on-integer-register instruction.
         // 
         BranchInst *BI = cast<BranchInst>(subtreeRoot->getInstruction());
         M = BuildMI(V9::BRNZ, 2).addReg(subtreeRoot->leftChild()->getValue())
@@ -1769,10 +1789,9 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
                 target.getTargetData().getTypeSize(leftVal->getType());
               Type* tmpTypeToUse =
                 (srcSize <= 4)? Type::FloatTy : Type::DoubleTy;
-              srcForCast = new TmpInstruction(tmpTypeToUse, dest);
               MachineCodeForInstruction &destMCFI = 
                 MachineCodeForInstruction::get(dest);
-              destMCFI.addTemp(srcForCast);
+              srcForCast = new TmpInstruction(destMCFI, tmpTypeToUse, dest);
 
               target.getInstrInfo().CreateCodeToCopyIntToFloat(target,
                          dest->getParent()->getParent(),
@@ -1878,14 +1897,14 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
       {
         maskUnsignedResult = true;
         Instruction* remInstr = subtreeRoot->getInstruction();
-        
-        TmpInstruction* quot = new TmpInstruction(
+
+        MachineCodeForInstruction& mcfi=MachineCodeForInstruction::get(remInstr);
+        TmpInstruction* quot = new TmpInstruction(mcfi,
                                         subtreeRoot->leftChild()->getValue(),
                                         subtreeRoot->rightChild()->getValue());
-        TmpInstruction* prod = new TmpInstruction(
+        TmpInstruction* prod = new TmpInstruction(mcfi,
                                         quot,
                                         subtreeRoot->rightChild()->getValue());
-        MachineCodeForInstruction::get(remInstr).addTemp(quot).addTemp(prod); 
         
         M = BuildMI(ChooseDivInstruction(target, subtreeRoot), 3)
                              .addReg(subtreeRoot->leftChild()->getValue())
@@ -1982,34 +2001,21 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // 
       case 42:	// bool:   SetCC(reg, reg):
       {
-        // This generates a SUBCC instruction, putting the difference in
-        // a result register, and setting a condition code.
+        // This generates a SUBCC instruction, putting the difference in a
+        // result reg. if needed, and/or setting a condition code if needed.
         // 
-        // If the boolean result of the SetCC is used by anything other
-        // than a branch instruction, or if it is used outside the current
-        // basic block, the boolean must be
-        // computed and stored in the result register.  Otherwise, discard
-        // the difference (by using %g0) and keep only the condition code.
-        // 
-        // To compute the boolean result in a register we use a conditional
-        // move, unless the result of the SUBCC instruction can be used as
-        // the bool!  This assumes that zero is FALSE and any non-zero
-        // integer is TRUE.
-        // 
-        InstructionNode* parentNode = (InstructionNode*) subtreeRoot->parent();
         Instruction* setCCInstr = subtreeRoot->getInstruction();
+        Value* leftVal = subtreeRoot->leftChild()->getValue();
+        bool isFPCompare = leftVal->getType()->isFloatingPoint();
         
-        bool keepBoolVal = parentNode == NULL ||
-                           ! AllUsesAreBranches(setCCInstr);
-        bool subValIsBoolVal = setCCInstr->getOpcode() == Instruction::SetNE;
-        bool keepSubVal = keepBoolVal && subValIsBoolVal;
-        bool computeBoolVal = keepBoolVal && ! subValIsBoolVal;
+        // If the boolean result of the SetCC is used outside the current basic
+        // block (so it must be computed as a boolreg) or is used by anything
+        // other than a branch, the boolean must be computed and stored
+        // in a result register.  We will use a conditional move to do this.
+        // 
+        bool computeBoolVal = (subtreeRoot->parent() == NULL ||
+                               ! AllUsesAreBranches(setCCInstr));
         
-        bool mustClearReg;
-        int valueToMove;
-        MachineOpCode movOpCode = 0;
-        
-        // Mark the 4th operand as being a CC register, and as a def
         // A TmpInstruction is created to represent the CC "result".
         // Unlike other instances of TmpInstruction, this one is used
         // by machine code of multiple LLVM instructions, viz.,
@@ -2019,74 +2025,47 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // needs to be a floating point condition code, not an integer
         // condition code.  Think of this as casting the bool result to
         // a FP condition code register.
+        // Later, we mark the 4th operand as being a CC register, and as a def.
         // 
-        Value* leftVal = subtreeRoot->leftChild()->getValue();
-        bool isFPCompare = leftVal->getType()->isFloatingPoint();
-        
         TmpInstruction* tmpForCC = GetTmpForCC(setCCInstr,
-                                     setCCInstr->getParent()->getParent(),
-                                     isFPCompare ? Type::FloatTy : Type::IntTy);
-        MachineCodeForInstruction::get(setCCInstr).addTemp(tmpForCC);
-        
+                                    setCCInstr->getParent()->getParent(),
+                                    isFPCompare ? Type::FloatTy : Type::IntTy,
+                                    MachineCodeForInstruction::get(setCCInstr));
         if (! isFPCompare) {
-          // Integer condition: dest. should be %g0 or an integer register.
-          // If result must be saved but condition is not SetEQ then we need
-          // a separate instruction to compute the bool result, so discard
-          // result of SUBcc instruction anyway.
-          // 
-          if (keepSubVal) {
-            M = BuildMI(V9::SUBccr, 4)
-              .addReg(subtreeRoot->leftChild()->getValue())
-              .addReg(subtreeRoot->rightChild()->getValue())
-              .addRegDef(subtreeRoot->getValue())
-              .addCCReg(tmpForCC, MOTy::Def);
-          } else {
-            M = BuildMI(V9::SUBccr, 4)
-              .addReg(subtreeRoot->leftChild()->getValue())
-              .addReg(subtreeRoot->rightChild()->getValue())
-              .addMReg(target.getRegInfo().getZeroRegNum(), MOTy::Def)
-              .addCCReg(tmpForCC, MOTy::Def);
-          }
-          mvec.push_back(M);
-            
-          if (computeBoolVal) {
-            // recompute bool using the integer condition codes
-            movOpCode =
-              ChooseMovpccAfterSub(subtreeRoot,mustClearReg,valueToMove);
-          }
+          // Integer condition: set CC and discard result.
+          M = BuildMI(V9::SUBccr, 4)
+            .addReg(subtreeRoot->leftChild()->getValue())
+            .addReg(subtreeRoot->rightChild()->getValue())
+            .addMReg(target.getRegInfo().getZeroRegNum(), MOTy::Def)
+            .addCCReg(tmpForCC, MOTy::Def);
         } else {
           // FP condition: dest of FCMP should be some FCCn register
           M = BuildMI(ChooseFcmpInstruction(subtreeRoot), 3)
             .addCCReg(tmpForCC, MOTy::Def)
             .addReg(subtreeRoot->leftChild()->getValue())
-            .addRegDef(subtreeRoot->rightChild()->getValue());
-          mvec.push_back(M);
-            
-          if (computeBoolVal) {
-            // recompute bool using the FP condition codes
-            mustClearReg = true;
-            valueToMove = 1;
-            movOpCode = ChooseMovFpccInstruction(subtreeRoot);
-          }
+            .addReg(subtreeRoot->rightChild()->getValue());
         }
+        mvec.push_back(M);
         
         if (computeBoolVal) {
-          if (mustClearReg) {
-            // Unconditionally set register to 0
-            M = BuildMI(V9::SETHI, 2).addZImm(0).addRegDef(setCCInstr);
-            mvec.push_back(M);
-          }
-            
-          // Now conditionally move `valueToMove' (0 or 1) into the register
+          MachineOpCode movOpCode = (isFPCompare
+                                     ? ChooseMovFpccInstruction(subtreeRoot)
+                                     : ChooseMovpccAfterSub(subtreeRoot));
+
+          // Unconditionally set register to 0
+          M = BuildMI(V9::SETHI, 2).addZImm(0).addRegDef(setCCInstr);
+          mvec.push_back(M);
+          
+          // Now conditionally move 1 into the register.
           // Mark the register as a use (as well as a def) because the old
-          // value should be retained if the condition is false.
-          M = BuildMI(movOpCode, 3).addCCReg(tmpForCC).addZImm(valueToMove)
-            .addReg(setCCInstr, MOTy::UseAndDef);
+          // value will be retained if the condition is false.
+          M = (BuildMI(movOpCode, 3).addCCReg(tmpForCC).addZImm(1)
+               .addReg(setCCInstr, MOTy::UseAndDef));
           mvec.push_back(M);
         }
         break;
-      }
-
+      }    
+      
       case 51:	// reg:   Load(reg)
       case 52:	// reg:   Load(ptrreg)
         SetOperandsForMemInstr(ChooseLoadInstruction(
@@ -2125,18 +2104,17 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         Value* numElementsVal = NULL;
         bool isArray = instr->isArrayAllocation();
         
-        if (!isArray ||
-            isa<Constant>(numElementsVal = instr->getArraySize()))
-        { 
+        if (!isArray || isa<Constant>(numElementsVal = instr->getArraySize())) {
           // total size is constant: generate code for fixed-size alloca
           unsigned numElements = isArray? 
             cast<ConstantUInt>(numElementsVal)->getValue() : 1;
           CreateCodeForFixedSizeAlloca(target, instr, tsize,
                                        numElements, mvec);
-        }
-        else // total size is not constant.
+        } else {
+          // total size is not constant.
           CreateCodeForVariableSizeAlloca(target, instr, tsize,
                                           numElementsVal, mvec);
+        }
         break;
       }
 
@@ -2167,26 +2145,34 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // This can also handle any intrinsics that are just function calls.
         // 
         if (! specialIntrinsic) {
+          MachineFunction& MF =
+            MachineFunction::get(callInstr->getParent()->getParent());
+          MachineCodeForInstruction& mcfi =
+            MachineCodeForInstruction::get(callInstr); 
+          const UltraSparcRegInfo& regInfo =
+            (UltraSparcRegInfo&) target.getRegInfo();
+          const TargetFrameInfo& frameInfo = target.getFrameInfo();
+
           // Create hidden virtual register for return address with type void*
           TmpInstruction* retAddrReg =
-            new TmpInstruction(PointerType::get(Type::VoidTy), callInstr);
-          MachineCodeForInstruction::get(callInstr).addTemp(retAddrReg);
+            new TmpInstruction(mcfi, PointerType::get(Type::VoidTy), callInstr);
 
           // Generate the machine instruction and its operands.
           // Use CALL for direct function calls; this optimistically assumes
           // the PC-relative address fits in the CALL address field (22 bits).
           // Use JMPL for indirect calls.
+          // This will be added to mvec later, after operand copies.
           // 
+          MachineInstr* callMI;
           if (calledFunc)             // direct function call
-            M = BuildMI(V9::CALL, 1).addPCDisp(callee);
+            callMI = BuildMI(V9::CALL, 1).addPCDisp(callee);
           else                        // indirect function call
-            M = BuildMI(V9::JMPLCALLi, 3).addReg(callee).addSImm((int64_t)0)
-              .addRegDef(retAddrReg);
-          mvec.push_back(M);
+            callMI = (BuildMI(V9::JMPLCALLi,3).addReg(callee)
+                      .addSImm((int64_t)0).addRegDef(retAddrReg));
 
           const FunctionType* funcType =
             cast<FunctionType>(cast<PointerType>(callee->getType())
-                                 ->getElementType());
+                               ->getElementType());
           bool isVarArgs = funcType->isVarArg();
           bool noPrototype = isVarArgs && funcType->getNumParams() == 0;
         
@@ -2194,64 +2180,213 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
           // to the register allocator.  This descriptor will be "owned"
           // and freed automatically when the MachineCodeForInstruction
           // object for the callInstr goes away.
-          CallArgsDescriptor* argDesc = new CallArgsDescriptor(callInstr,
-                                             retAddrReg, isVarArgs,noPrototype);
-            
+          CallArgsDescriptor* argDesc =
+            new CallArgsDescriptor(callInstr, retAddrReg,isVarArgs,noPrototype);
           assert(callInstr->getOperand(0) == callee
                  && "This is assumed in the loop below!");
-            
+          
+          // Insert copy instructions to get all the arguments into
+          // all the places that they need to be.
+          // 
           for (unsigned i=1, N=callInstr->getNumOperands(); i < N; ++i) {
+            int argNo = i-1;
             Value* argVal = callInstr->getOperand(i);
-            Instruction* intArgReg = NULL;
-            
+            const Type* argType = argVal->getType();
+            unsigned regType = regInfo.getRegType(argType);
+            unsigned argSize = target.getTargetData().getTypeSize(argType);
+            int regNumForArg = TargetRegInfo::getInvalidRegNum();
+            unsigned regClassIDOfArgReg;
+            CallArgInfo& argInfo = argDesc->getArgInfo(argNo);
+
             // Check for FP arguments to varargs functions.
             // Any such argument in the first $K$ args must be passed in an
-            // integer register, where K = #integer argument registers.
-            if (isVarArgs && argVal->getType()->isFloatingPoint()) {
+            // integer register.  If there is no prototype, it must also
+            // be passed as an FP register.
+            // K = #integer argument registers.
+            bool isFPArg = argVal->getType()->isFloatingPoint();
+            if (isVarArgs && isFPArg) {
               // If it is a function with no prototype, pass value
               // as an FP value as well as a varargs value
               if (noPrototype)
-                argDesc->getArgInfo(i-1).setUseFPArgReg();
+                argInfo.setUseFPArgReg();
                 
-              // If this arg. is in the first $K$ regs, add a copy
-              // float-to-int instruction to pass the value as an integer.
-              if (i <= target.getRegInfo().getNumOfIntArgRegs()) {
-                MachineCodeForInstruction &destMCFI = 
-                  MachineCodeForInstruction::get(callInstr);   
-                intArgReg = new TmpInstruction(Type::IntTy, argVal);
-                destMCFI.addTemp(intArgReg);
+              // If this arg. is in the first $K$ regs, add copy-
+              // float-to-int instructions to pass the value as an int.
+              // To check if it is in teh first $K$, get the register
+              // number for the arg #i.
+              int copyRegNum = regInfo.regNumForIntArg(false, false,
+                                                       argNo, regClassIDOfArgReg);
+              if (copyRegNum != regInfo.getInvalidRegNum()) {
+                // Create a virtual register to represent copyReg. Mark
+                // this vreg as being an implicit operand of the call MI
+                const Type* loadTy = (argType == Type::FloatTy
+                                      ? Type::IntTy : Type::LongTy);
+                TmpInstruction* argVReg= new TmpInstruction(mcfi,loadTy,
+                                                            argVal, NULL, "argRegCopy");
+                callMI->addImplicitRef(argVReg);
+                        
+                // Get a temp stack location to use to copy
+                // float-to-int via the stack.
+                // 
+                // FIXME: For now, we allocate permanent space because
+                // the stack frame manager does not allow locals to be
+                // allocated (e.g., for alloca) after a temp is
+                // allocated!
+                // 
+                // int tmpOffset = MF.getInfo()->pushTempValue(argSize);
+                int tmpOffset = MF.getInfo()->allocateLocalVar(argVReg);
                     
-                std::vector<MachineInstr*> copyMvec;
-                target.getInstrInfo().CreateCodeToCopyFloatToInt(target,
-                                         callInstr->getParent()->getParent(),
-                                         argVal, (TmpInstruction*) intArgReg,
-                                         copyMvec, destMCFI);
-                mvec.insert(mvec.begin(),copyMvec.begin(),copyMvec.end());
-                    
-                argDesc->getArgInfo(i-1).setUseIntArgReg();
-                argDesc->getArgInfo(i-1).setArgCopy(intArgReg);
-              } else
+                // Generate the store from FP reg to stack
+                M = BuildMI(ChooseStoreInstruction(argType), 3)
+                  .addReg(argVal).addMReg(regInfo.getFramePointer())
+                  .addSImm(tmpOffset);
+                mvec.push_back(M);
+                        
+                // Generate the load from stack to int arg reg
+                M = BuildMI(ChooseLoadInstruction(loadTy), 3)
+                  .addMReg(regInfo.getFramePointer()).addSImm(tmpOffset)
+                  .addReg(argVReg, MOTy::Def);
+
+                // Mark operand with register it should be assigned
+                // both for copy and for the callMI
+                M->SetRegForOperand(M->getNumOperands()-1, copyRegNum);
+                callMI->SetRegForImplicitRef(
+                                             callMI->getNumImplicitRefs()-1, copyRegNum);
+
+                mvec.push_back(M);
+
+                // Add info about the argument to the CallArgsDescriptor
+                argInfo.setUseIntArgReg();
+                argInfo.setArgCopy(copyRegNum);
+              } else {
                 // Cannot fit in first $K$ regs so pass arg on stack
-                argDesc->getArgInfo(i-1).setUseStackSlot();
+                argInfo.setUseStackSlot();
+              }
+            } else if (isFPArg) {
+              // Get the outgoing arg reg to see if there is one.
+              regNumForArg = regInfo.regNumForFPArg(regType, false, false,
+                                                    argNo, regClassIDOfArgReg);
+              if (regNumForArg == regInfo.getInvalidRegNum())
+                argInfo.setUseStackSlot();
+              else {
+                argInfo.setUseFPArgReg();
+                regNumForArg =regInfo.getUnifiedRegNum(regClassIDOfArgReg,
+                                                       regNumForArg);
+              }
+            } else {
+              // Get the outgoing arg reg to see if there is one.
+              regNumForArg = regInfo.regNumForIntArg(false,false,
+                                                     argNo, regClassIDOfArgReg);
+              if (regNumForArg == regInfo.getInvalidRegNum())
+                argInfo.setUseStackSlot();
+              else {
+                argInfo.setUseIntArgReg();
+                regNumForArg =regInfo.getUnifiedRegNum(regClassIDOfArgReg,
+                                                       regNumForArg);
+              }
+            }                
+
+            // 
+            // Now insert copy instructions to stack slot or arg. register
+            // 
+            if (argInfo.usesStackSlot()) {
+              // Get the stack offset for this argument slot.
+              // FP args on stack are right justified so adjust offset!
+              // int arguments are also right justified but they are
+              // always loaded as a full double-word so the offset does
+              // not need to be adjusted.
+              int argOffset = frameInfo.getOutgoingArgOffset(MF, argNo);
+              if (argType->isFloatingPoint()) {
+                unsigned slotSize = frameInfo.getSizeOfEachArgOnStack();
+                assert(argSize <= slotSize && "Insufficient slot size!");
+                argOffset += slotSize - argSize;
+              }
+
+              // Now generate instruction to copy argument to stack
+              MachineOpCode storeOpCode =
+                (argType->isFloatingPoint()
+                 ? ((argSize == 4)? V9::STFi : V9::STDFi) : V9::STXi);
+
+              M = BuildMI(storeOpCode, 3).addReg(argVal)
+                .addMReg(regInfo.getStackPointer()).addSImm(argOffset);
+              mvec.push_back(M);
+            } else {
+              // Create a virtual register to represent the arg reg. Mark
+              // this vreg as being an implicit operand of the call MI.
+              TmpInstruction* argVReg = 
+                new TmpInstruction(mcfi, argVal, NULL, "argReg");
+
+              callMI->addImplicitRef(argVReg);
+                    
+              // Generate the reg-to-reg copy into the outgoing arg reg.
+              // -- For FP values, create a FMOVS or FMOVD instruction
+              // -- For non-FP values, create an add-with-0 instruction
+              if (argType->isFloatingPoint())
+                M=(BuildMI(argType==Type::FloatTy? V9::FMOVS :V9::FMOVD,2)
+                   .addReg(argVal).addReg(argVReg, MOTy::Def));
+              else
+                M = (BuildMI(ChooseAddInstructionByType(argType), 3)
+                     .addReg(argVal).addSImm((int64_t) 0)
+                     .addReg(argVReg, MOTy::Def));
+                    
+              // Mark the operand with the register it should be assigned
+              M->SetRegForOperand(M->getNumOperands()-1, regNumForArg);
+              callMI->SetRegForImplicitRef(callMI->getNumImplicitRefs()-1,
+                                           regNumForArg);
+
+              mvec.push_back(M);
             }
-            
-            if (intArgReg)
-              mvec.back()->addImplicitRef(intArgReg);
-            
-            mvec.back()->addImplicitRef(argVal);
           }
-        
+
+          // add call instruction and delay slot before copying return value
+          mvec.push_back(callMI);
+          mvec.push_back(BuildMI(V9::NOP, 0));
+
           // Add the return value as an implicit ref.  The call operands
-          // were added above.
-          if (callInstr->getType() != Type::VoidTy)
-            mvec.back()->addImplicitRef(callInstr, /*isDef*/ true);
-        
+          // were added above.  Also, add code to copy out the return value.
+          // This is always register-to-register for int or FP return values.
+          // 
+          if (callInstr->getType() != Type::VoidTy) { 
+            // Get the return value reg.
+            const Type* retType = callInstr->getType();
+
+            int regNum = (retType->isFloatingPoint()
+                          ? (unsigned) SparcFloatRegClass::f0 
+                          : (unsigned) SparcIntRegClass::o0);
+            unsigned regClassID = regInfo.getRegClassIDOfType(retType);
+            regNum = regInfo.getUnifiedRegNum(regClassID, regNum);
+
+            // Create a virtual register to represent it and mark
+            // this vreg as being an implicit operand of the call MI
+            TmpInstruction* retVReg = 
+              new TmpInstruction(mcfi, callInstr, NULL, "argReg");
+
+            callMI->addImplicitRef(retVReg, /*isDef*/ true);
+
+            // Generate the reg-to-reg copy from the return value reg.
+            // -- For FP values, create a FMOVS or FMOVD instruction
+            // -- For non-FP values, create an add-with-0 instruction
+            if (retType->isFloatingPoint())
+              M = (BuildMI(retType==Type::FloatTy? V9::FMOVS : V9::FMOVD, 2)
+                   .addReg(retVReg).addReg(callInstr, MOTy::Def));
+            else
+              M = (BuildMI(ChooseAddInstructionByType(retType), 3)
+                   .addReg(retVReg).addSImm((int64_t) 0)
+                   .addReg(callInstr, MOTy::Def));
+
+            // Mark the operand with the register it should be assigned
+            // Also mark the implicit ref of the call defining this operand
+            M->SetRegForOperand(0, regNum);
+            callMI->SetRegForImplicitRef(callMI->getNumImplicitRefs()-1,regNum);
+
+            mvec.push_back(M);
+          }
+
           // For the CALL instruction, the ret. addr. reg. is also implicit
           if (isa<Function>(callee))
-            mvec.back()->addImplicitRef(retAddrReg, /*isDef*/ true);
-        
-          // delay slot
-          mvec.push_back(BuildMI(V9::NOP, 0));
+            callMI->addImplicitRef(retAddrReg, /*isDef*/ true);
+
+          MF.getInfo()->popAllTempValues();  // free temps used for this inst
         }
 
         break;
@@ -2345,9 +2480,9 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // intermediate result before masking.  Since those instructions
         // have already been generated, go back and substitute tmpI
         // for dest in the result position of each one of them.
-        TmpInstruction *tmpI = new TmpInstruction(dest->getType(), dest,
-                                                  NULL, "maskHi");
-        MachineCodeForInstruction::get(dest).addTemp(tmpI);
+        TmpInstruction *tmpI =
+          new TmpInstruction(MachineCodeForInstruction::get(dest),
+                             dest->getType(), dest, NULL, "maskHi");
 
         for (unsigned i=0, N=mvec.size(); i < N; ++i)
           mvec[i]->substituteValue(dest, tmpI);
diff --git a/lib/Target/SparcV9/SparcV9Internals.h b/lib/Target/SparcV9/SparcV9Internals.h
index 3e49efd5276..b4ce61710c3 100644
--- a/lib/Target/SparcV9/SparcV9Internals.h
+++ b/lib/Target/SparcV9/SparcV9Internals.h
@@ -272,12 +272,6 @@ class UltraSparcRegInfo : public TargetRegInfo {
   //
   unsigned const NumOfFloatArgRegs;
 
-  // An out of bound register number that can be used to initialize register
-  // numbers. Useful for error detection.
-  //
-  int const InvalidRegNum;
-
-
   // ========================  Private Methods =============================
 
   // The following methods are used to color special live ranges (e.g.
@@ -295,13 +289,9 @@ class UltraSparcRegInfo : public TargetRegInfo {
                              int  UniArgReg, unsigned int argNo,
                              std::vector<MachineInstr *>& AddedInstrnsBefore)
     const;
-  
-  // Get the register type for a register identified different ways.
-  // The first function is a helper used by the all the hoter functions.
+
+  // Helper used by the all the getRegType() functions.
   int getRegTypeForClassAndType(unsigned regClassID, const Type* type) const;
-  int getRegType(const Type* type) const;
-  int getRegType(const LiveRange *LR) const;
-  int getRegType(int unifiedRegNum) const;
 
   // Used to generate a copy instruction based on the register class of
   // value.
@@ -322,17 +312,6 @@ class UltraSparcRegInfo : public TargetRegInfo {
                          std::vector<MachineInstr *> &OrdVec,
                          PhyRegAlloc &PRA) const;
 
-
-  // Compute which register can be used for an argument, if any
-  // 
-  int regNumForIntArg(bool inCallee, bool isVarArgsCall,
-                      unsigned argNo, unsigned intArgNo, unsigned fpArgNo,
-                      unsigned& regClassId) const;
-
-  int regNumForFPArg(unsigned RegType, bool inCallee, bool isVarArgsCall,
-                     unsigned argNo, unsigned intArgNo, unsigned fpArgNo,
-                     unsigned& regClassId) const;
-  
 public:
   // Type of registers available in Sparc. There can be several reg types
   // in the same class. For instace, the float reg class has Single/Double
@@ -380,6 +359,14 @@ public:
   unsigned const getNumOfIntArgRegs() const   { return NumOfIntArgRegs; }
   unsigned const getNumOfFloatArgRegs() const { return NumOfFloatArgRegs; }
   
+  // Compute which register can be used for an argument, if any
+  // 
+  int regNumForIntArg(bool inCallee, bool isVarArgsCall,
+                      unsigned argNo, unsigned& regClassId) const;
+
+  int regNumForFPArg(unsigned RegType, bool inCallee, bool isVarArgsCall,
+                     unsigned argNo, unsigned& regClassId) const;
+  
   // The following methods are used to color special live ranges (e.g.
   // function args and return values etc.) with specific hardware registers
   // as required. See SparcRegInfo.cpp for the implementation for Sparc.
@@ -458,14 +445,14 @@ public:
     return MachineRegClassArr[RegClassID]->isRegVolatile(Reg);
   }
 
+  // Get the register type for a register identified different ways.
+  int getRegType(const Type* type) const;
+  int getRegType(const LiveRange *LR) const;
+  int getRegType(int unifiedRegNum) const;
 
   virtual unsigned getFramePointer() const;
   virtual unsigned getStackPointer() const;
 
-  virtual int getInvalidRegNum() const {
-    return InvalidRegNum;
-  }
-
   // This method inserts the caller saving code for call instructions
   //
   void insertCallerSavingCode(std::vector<MachineInstr*>& instrnsBefore,
diff --git a/lib/Target/SparcV9/SparcV9RegInfo.cpp b/lib/Target/SparcV9/SparcV9RegInfo.cpp
index 2f86a2ff1b6..141f22667d6 100644
--- a/lib/Target/SparcV9/SparcV9RegInfo.cpp
+++ b/lib/Target/SparcV9/SparcV9RegInfo.cpp
@@ -25,9 +25,8 @@ enum {
 };
 
 UltraSparcRegInfo::UltraSparcRegInfo(const UltraSparc &tgt)
-  : TargetRegInfo(tgt), NumOfIntArgRegs(6), 
-    NumOfFloatArgRegs(32), InvalidRegNum(1000) {
-   
+  : TargetRegInfo(tgt), NumOfIntArgRegs(6), NumOfFloatArgRegs(32)
+{
   MachineRegClassArr.push_back(new SparcIntRegClass(IntRegClassID));
   MachineRegClassArr.push_back(new SparcFloatRegClass(FloatRegClassID));
   MachineRegClassArr.push_back(new SparcIntCCRegClass(IntCCRegClassID));
@@ -157,56 +156,48 @@ isVarArgsCall(const MachineInstr *CallMI) {
 }
 
 
-// Get the register number for the specified integer arg#,
-// assuming there are argNum total args, intArgNum int args,
-// and fpArgNum FP args preceding (and not including) this one.
-// Use INT regs for FP args if this is a varargs call.
+// Get the register number for the specified argument #argNo,
 // 
 // Return value:
-//      InvalidRegNum,  if there is no int register available for the arg. 
-//      regNum,         otherwise (this is NOT the unified reg. num).
+//      getInvalidRegNum(),  if there is no int register available for the arg. 
+//      regNum,              otherwise (this is NOT the unified reg. num).
+//                           regClassId is set to the register class ID.
 // 
-inline int
+int
 UltraSparcRegInfo::regNumForIntArg(bool inCallee, bool isVarArgsCall,
-                                   unsigned argNo,
-                                   unsigned intArgNo, unsigned fpArgNo,
-                                   unsigned& regClassId) const
+                                   unsigned argNo, unsigned& regClassId) const
 {
   regClassId = IntRegClassID;
   if (argNo >= NumOfIntArgRegs)
-    return InvalidRegNum;
+    return getInvalidRegNum();
   else
     return argNo + (inCallee? SparcIntRegClass::i0 : SparcIntRegClass::o0);
 }
 
-// Get the register number for the specified FP arg#,
-// assuming there are argNum total args, intArgNum int args,
-// and fpArgNum FP args preceding (and not including) this one.
+// Get the register number for the specified FP argument #argNo,
 // Use INT regs for FP args if this is a varargs call.
 // 
 // Return value:
-//      InvalidRegNum,  if there is no int register available for the arg. 
-//      regNum,         otherwise (this is NOT the unified reg. num).
+//      getInvalidRegNum(),  if there is no int register available for the arg. 
+//      regNum,              otherwise (this is NOT the unified reg. num).
+//                           regClassId is set to the register class ID.
 // 
-inline int
+int
 UltraSparcRegInfo::regNumForFPArg(unsigned regType,
                                   bool inCallee, bool isVarArgsCall,
-                                  unsigned argNo,
-                                  unsigned intArgNo, unsigned fpArgNo,
-                                  unsigned& regClassId) const
+                                  unsigned argNo, unsigned& regClassId) const
 {
   if (isVarArgsCall)
-    return regNumForIntArg(inCallee, isVarArgsCall, argNo, intArgNo, fpArgNo,
-                           regClassId);
+    return regNumForIntArg(inCallee, isVarArgsCall, argNo, regClassId);
   else
     {
       regClassId = FloatRegClassID;
       if (regType == FPSingleRegType)
         return (argNo*2+1 >= NumOfFloatArgRegs)?
-          InvalidRegNum : SparcFloatRegClass::f0 + (argNo * 2 + 1);
+          getInvalidRegNum() : SparcFloatRegClass::f0 + (argNo * 2 + 1);
       else if (regType == FPDoubleRegType)
         return (argNo*2 >= NumOfFloatArgRegs)?
-          InvalidRegNum : SparcFloatRegClass::f0 + (argNo * 2);
+          getInvalidRegNum() : SparcFloatRegClass::f0 + (argNo * 2);
       else
         assert(0 && "Illegal FP register type");
 	return 0;
@@ -379,11 +370,11 @@ void UltraSparcRegInfo::suggestRegs4MethodArgs(const Function *Meth,
     
     int regNum = (regType == IntRegType)
       ? regNumForIntArg(/*inCallee*/ true, isVarArgs,
-                        argNo, intArgNo++, fpArgNo, regClassIDOfArgReg)
+                        argNo, regClassIDOfArgReg)
       : regNumForFPArg(regType, /*inCallee*/ true, isVarArgs,
-                       argNo, intArgNo, fpArgNo++, regClassIDOfArgReg); 
+                       argNo, regClassIDOfArgReg); 
     
-    if(regNum != InvalidRegNum)
+    if(regNum != getInvalidRegNum())
       LR->setSuggestedColor(regNum);
   }
 }
@@ -418,16 +409,16 @@ void UltraSparcRegInfo::colorMethodArgs(const Function *Meth,
     // Also find the correct register the argument must use (UniArgReg)
     //
     bool isArgInReg = false;
-    unsigned UniArgReg = InvalidRegNum;	// reg that LR MUST be colored with
+    unsigned UniArgReg = getInvalidRegNum(); // reg that LR MUST be colored with
     unsigned regClassIDOfArgReg = BadRegClass; // reg class of chosen reg
     
     int regNum = (regType == IntRegType)
       ? regNumForIntArg(/*inCallee*/ true, isVarArgs,
-                        argNo, intArgNo++, fpArgNo, regClassIDOfArgReg)
+                        argNo, regClassIDOfArgReg)
       : regNumForFPArg(regType, /*inCallee*/ true, isVarArgs,
-                       argNo, intArgNo, fpArgNo++, regClassIDOfArgReg);
+                       argNo, regClassIDOfArgReg);
     
-    if(regNum != InvalidRegNum) {
+    if(regNum != getInvalidRegNum()) {
       isArgInReg = true;
       UniArgReg = getUnifiedRegNum( regClassIDOfArgReg, regNum);
     }
@@ -482,7 +473,17 @@ void UltraSparcRegInfo::colorMethodArgs(const Function *Meth,
 	int offsetFromFP =
           frameInfo.getIncomingArgOffset(MachineFunction::get(Meth),
                                          argNo);
-        
+
+        // float arguments on stack are right justified so adjust the offset!
+        // int arguments are also right justified but they are always loaded as
+        // a full double-word so the offset does not need to be adjusted.
+        if (regType == FPSingleRegType) {
+          unsigned argSize = target.getTargetData().getTypeSize(LR->getType());
+          unsigned slotSize = frameInfo.getSizeOfEachArgOnStack();
+          assert(argSize <= slotSize && "Insufficient slot size!");
+          offsetFromFP += slotSize - argSize;
+        }
+
 	cpMem2RegMI(FirstAI->InstrnsBefore,
                     getFramePointer(), offsetFromFP, UniLRReg, regType);
       }
@@ -525,11 +526,21 @@ void UltraSparcRegInfo::colorMethodArgs(const Function *Meth,
 	// can simply change the stack position of the LR. We can do this,
 	// since this method is called before any other method that makes
 	// uses of the stack pos of the LR (e.g., updateMachineInstr)
-
+        // 
         const TargetFrameInfo& frameInfo = target.getFrameInfo();
 	int offsetFromFP =
           frameInfo.getIncomingArgOffset(MachineFunction::get(Meth),
                                          argNo);
+
+        // FP arguments on stack are right justified so adjust offset!
+        // int arguments are also right justified but they are always loaded as
+        // a full double-word so the offset does not need to be adjusted.
+        if (regType == FPSingleRegType) {
+          unsigned argSize = target.getTargetData().getTypeSize(LR->getType());
+          unsigned slotSize = frameInfo.getSizeOfEachArgOnStack();
+          assert(argSize <= slotSize && "Insufficient slot size!");
+          offsetFromFP += slotSize - argSize;
+        }
         
 	LR->modifySpillOffFromFP( offsetFromFP );
       }
@@ -586,11 +597,11 @@ void UltraSparcRegInfo::suggestRegs4CallArgs(MachineInstr *CallMI,
     
     // get the LR of call operand (parameter)
     LiveRange *const LR = LRI.getLiveRangeForValue(CallArg); 
-    assert (LR && "Must have a LR for all arguments since "
-                  "all args (even consts) must be defined before");
+    if (!LR)
+      continue;                    // no live ranges for constants and labels
 
     unsigned regType = getRegType(LR);
-    unsigned regClassIDOfArgReg = BadRegClass; // reg class of chosen reg (unused)
+    unsigned regClassIDOfArgReg = BadRegClass; // chosen reg class (unused)
 
     // Choose a register for this arg depending on whether it is
     // an INT or FP value.  Here we ignore whether or not it is a
@@ -598,15 +609,16 @@ void UltraSparcRegInfo::suggestRegs4CallArgs(MachineInstr *CallMI,
     // to an integer Value and handled under (argCopy != NULL) below.
     int regNum = (regType == IntRegType)
       ? regNumForIntArg(/*inCallee*/ false, /*isVarArgs*/ false,
-                        argNo, intArgNo++, fpArgNo, regClassIDOfArgReg)
+                        argNo, regClassIDOfArgReg)
       : regNumForFPArg(regType, /*inCallee*/ false, /*isVarArgs*/ false,
-                       argNo, intArgNo, fpArgNo++, regClassIDOfArgReg); 
+                       argNo, regClassIDOfArgReg); 
     
     // If a register could be allocated, use it.
     // If not, do NOTHING as this will be colored as a normal value.
-    if(regNum != InvalidRegNum)
+    if(regNum != getInvalidRegNum())
       LR->setSuggestedColor(regNum);
     
+#ifdef CANNOT_PRECOPY_CALLARGS
     // Repeat for the second copy of the argument, which would be
     // an FP argument being passed to a function with no prototype
     const Value *argCopy = argDesc->getArgInfo(i).getArgCopy();
@@ -615,12 +627,12 @@ void UltraSparcRegInfo::suggestRegs4CallArgs(MachineInstr *CallMI,
         assert(regType != IntRegType && argCopy->getType()->isInteger()
                && "Must be passing copy of FP argument in int register");
         int copyRegNum = regNumForIntArg(/*inCallee*/false, /*isVarArgs*/false,
-                                         argNo, intArgNo, fpArgNo-1,
-                                         regClassIDOfArgReg);
-        assert(copyRegNum != InvalidRegNum); 
+                                         argNo, regClassIDOfArgReg);
+        assert(copyRegNum != getInvalidRegNum()); 
         LiveRange *const copyLR = LRI.getLiveRangeForValue(argCopy); 
         copyLR->setSuggestedColor(copyRegNum);
       }
+#endif
     
   } // for all call arguments
 
@@ -640,14 +652,15 @@ UltraSparcRegInfo::InitializeOutgoingArg(MachineInstr* CallMI,
                              std::vector<MachineInstr*> &AddedInstrnsBefore)
   const
 {
+  assert(0 && "Should never get here because we are now using precopying!");
+
   MachineInstr *AdMI;
   bool isArgInReg = false;
   unsigned UniArgReg = BadRegClass;          // unused unless initialized below
-  if (UniArgRegOrNone != InvalidRegNum)
+  if (UniArgRegOrNone != getInvalidRegNum())
     {
       isArgInReg = true;
       UniArgReg = (unsigned) UniArgRegOrNone;
-      CallMI->insertUsedReg(UniArgReg); // mark the reg as used
     }
   
   if (LR->hasColor()) {
@@ -748,35 +761,24 @@ void UltraSparcRegInfo::colorCallArgs(MachineInstr *CallMI,
 
   if (RetVal) {
     LiveRange *RetValLR = LRI.getLiveRangeForValue( RetVal );
+    assert(RetValLR && "ERROR: No LR for non-void return value");
 
-    if (!RetValLR) {
-      std::cerr << "\nNo LR for:" << RAV(RetVal) << "\n";
-      assert(RetValLR && "ERR:No LR for non-void return value");
-    }
-
+    // Mark the return value register as used by this instruction
     unsigned RegClassID = RetValLR->getRegClassID();
-    bool recvCorrectColor;
-    unsigned CorrectCol;                // correct color for ret value
-    unsigned UniRetReg;                 // unified number for CorrectCol
+    unsigned CorrectCol = (RegClassID == IntRegClassID
+                           ? (unsigned) SparcIntRegClass::o0
+                           : (unsigned) SparcFloatRegClass::f0);
     
-    if(RegClassID == IntRegClassID)
-      CorrectCol = SparcIntRegClass::o0;
-    else if(RegClassID == FloatRegClassID)
-      CorrectCol = SparcFloatRegClass::f0;
-    else {
-      assert( 0 && "Unknown RegClass");
-      return;
-    }
+    CallMI->insertUsedReg(getUnifiedRegNum(RegClassID, CorrectCol));	
     
-    // convert to unified number
-    UniRetReg = getUnifiedRegNum(RegClassID, CorrectCol);	
+#ifdef CANNOT_PRECOPY_CALLARGS
+    // unified number for CorrectCol
+    unsigned UniRetReg = getUnifiedRegNum(RegClassID, CorrectCol);
+    recvCorrectColor;
 
-    // Mark the register as used by this instruction
-    CallMI->insertUsedReg(UniRetReg);
-    
     // if the LR received the correct color, NOTHING to do
-    recvCorrectColor = RetValLR->hasColor()? RetValLR->getColor() == CorrectCol
-      : false;
+    bool recvCorrectColor = (RetValLR->hasColor()
+                             ? RetValLR->getColor() == CorrectCol : false);
     
     // if we didn't receive the correct color for some reason, 
     // put copy instruction
@@ -802,8 +804,8 @@ void UltraSparcRegInfo::colorCallArgs(MachineInstr *CallMI,
         cpReg2MemMI(CallAI->InstrnsAfter, UniRetReg,
                     getFramePointer(),RetValLR->getSpillOffFromFP(), regType);
       }
-
     } // the LR didn't receive the suggested color  
+#endif
     
   } // if there a return value
   
@@ -818,56 +820,62 @@ void UltraSparcRegInfo::colorCallArgs(MachineInstr *CallMI,
   
   for(unsigned argNo=0, i=0, intArgNo=0, fpArgNo=0;
       i < NumOfCallArgs; ++i, ++argNo) {    
-
+    
     const Value *CallArg = argDesc->getArgInfo(i).getArgVal();
-    
-    // get the LR of call operand (parameter)
-    LiveRange *const LR = LRI.getLiveRangeForValue(CallArg); 
-
-    unsigned RegClassID = getRegClassIDOfType(CallArg->getType());
     unsigned regType = getRegType(CallArg->getType());
-    
+
     // Find whether this argument is coming in a register (if not, on stack)
     // Also find the correct register the argument must use (UniArgReg)
     //
     bool isArgInReg = false;
-    unsigned UniArgReg = InvalidRegNum;	  // reg that LR MUST be colored with
+    int UniArgReg = getInvalidRegNum();	  // reg that LR MUST be colored with
     unsigned regClassIDOfArgReg = BadRegClass; // reg class of chosen reg
     
     // Find the register that must be used for this arg, depending on
     // whether it is an INT or FP value.  Here we ignore whether or not it
     // is a varargs calls, because FP arguments will be explicitly copied
     // to an integer Value and handled under (argCopy != NULL) below.
+    // 
     int regNum = (regType == IntRegType)
       ? regNumForIntArg(/*inCallee*/ false, /*isVarArgs*/ false,
-                        argNo, intArgNo++, fpArgNo, regClassIDOfArgReg)
+                        argNo, regClassIDOfArgReg)
       : regNumForFPArg(regType, /*inCallee*/ false, /*isVarArgs*/ false,
-                       argNo, intArgNo, fpArgNo++, regClassIDOfArgReg); 
+                       argNo, regClassIDOfArgReg); 
     
-    if(regNum != InvalidRegNum) {
+    if (regNum != getInvalidRegNum()) {
       isArgInReg = true;
-      UniArgReg = getUnifiedRegNum( regClassIDOfArgReg, regNum);
+      UniArgReg = getUnifiedRegNum(regClassIDOfArgReg, regNum);
+      CallMI->insertUsedReg(UniArgReg);         // mark the reg as used
+    }
+
+#ifdef CANNOT_PRECOPY_CALLARGS
+    
+    // Get the LR of call operand (parameter).  There must be one because
+    // all args (even constants) must be defined before.
+    LiveRange *const LR = LRI.getLiveRangeForValue(CallArg); 
+    assert(LR && "NO LR for call arg");  
+
+    unsigned RegClassID = getRegClassIDOfType(CallArg->getType());
+
+    if (regNum != getInvalidRegNum()) {
       assert(regClassIDOfArgReg == RegClassID &&
              "Moving values between reg classes must happen during selection");
     }
     
-    // not possible to have a null LR since all args (even consts)  
-    // must be defined before
-    if (!LR) {          
-      std::cerr <<" ERROR: In call instr, no LR for arg: " <<RAV(CallArg)<<"\n";
-      assert(LR && "NO LR for call arg");  
-    }
-    
     InitializeOutgoingArg(CallMI, CallAI, PRA, LR, regType, RegClassID,
                           UniArgReg, argNo, AddedInstrnsBefore);
+#endif
     
     // Repeat for the second copy of the argument, which would be
     // an FP argument being passed to a function with no prototype.
     // It may either be passed as a copy in an integer register
     // (in argCopy), or on the stack (useStackSlot).
-    const Value *argCopy = argDesc->getArgInfo(i).getArgCopy();
-    if (argCopy != NULL)
+    int argCopyReg = argDesc->getArgInfo(i).getArgCopy();
+    if (argCopyReg != TargetRegInfo::getInvalidRegNum())
       {
+        CallMI->insertUsedReg(argCopyReg); // mark the reg as used
+
+#ifdef CANNOT_PRECOPY_CALLARGS
         assert(regType != IntRegType && argCopy->getType()->isInteger()
                && "Must be passing copy of FP argument in int register");
         
@@ -875,9 +883,8 @@ void UltraSparcRegInfo::colorCallArgs(MachineInstr *CallMI,
         unsigned copyRegType = getRegType(argCopy->getType());
         
         int copyRegNum = regNumForIntArg(/*inCallee*/false, /*isVarArgs*/false,
-                                         argNo, intArgNo, fpArgNo-1,
-                                         regClassIDOfArgReg);
-        assert(copyRegNum != InvalidRegNum); 
+                                         argNo, regClassIDOfArgReg);
+        assert(copyRegNum != getInvalidRegNum()); 
         assert(regClassIDOfArgReg == copyRegClassID &&
            "Moving values between reg classes must happen during selection");
         
@@ -885,17 +892,20 @@ void UltraSparcRegInfo::colorCallArgs(MachineInstr *CallMI,
                               LRI.getLiveRangeForValue(argCopy), copyRegType,
                               copyRegClassID, copyRegNum, argNo,
                               AddedInstrnsBefore);
+#endif
       }
     
-    if (regNum != InvalidRegNum &&
+#ifdef CANNOT_PRECOPY_CALLARGS
+    if (regNum != getInvalidRegNum() &&
         argDesc->getArgInfo(i).usesStackSlot())
       {
         // Pass the argument via the stack in addition to regNum
         assert(regType != IntRegType && "Passing an integer arg. twice?");
         assert(!argCopy && "Passing FP arg in FP reg, INT reg, and stack?");
         InitializeOutgoingArg(CallMI, CallAI, PRA, LR, regType, RegClassID,
-                              InvalidRegNum, argNo, AddedInstrnsBefore);
+                              getInvalidRegNum(), argNo, AddedInstrnsBefore);
       }
+#endif
   }  // for each parameter in call instruction
 
   // If we added any instruction before the call instruction, verify
@@ -943,25 +953,15 @@ void UltraSparcRegInfo::suggestReg4RetValue(MachineInstr *RetMI,
 
   suggestReg4RetAddr(RetMI, LRI);
 
-  // if there is an implicit ref, that has to be the ret value
-  if(  RetMI->getNumImplicitRefs() > 0 ) {
-
-    // The first implicit operand is the return value of a return instr
-    const Value *RetVal =  RetMI->getImplicitRef(0);
-
-    LiveRange *const LR = LRI.getLiveRangeForValue( RetVal ); 
-
-    if (!LR) {
-      std::cerr << "\nNo LR for:" << RAV(RetVal) << "\n";
-      assert(0 && "No LR for return value of non-void method");
-    }
-
-    unsigned RegClassID = LR->getRegClassID();
-    if (RegClassID == IntRegClassID) 
-      LR->setSuggestedColor(SparcIntRegClass::i0);
-    else if (RegClassID == FloatRegClassID) 
-      LR->setSuggestedColor(SparcFloatRegClass::f0);
-  }
+  // To find the return value (if any), we can get the LLVM return instr.
+  // from the return address register, which is the first operand
+  Value* tmpI = RetMI->getOperand(0).getVRegValue();
+  ReturnInst* retI=cast<ReturnInst>(cast<TmpInstruction>(tmpI)->getOperand(0));
+  if (const Value *RetVal = retI->getReturnValue())
+    if (LiveRange *const LR = LRI.getLiveRangeForValue(RetVal))
+      LR->setSuggestedColor(LR->getRegClassID() == IntRegClassID
+                            ? (unsigned) SparcIntRegClass::i0
+                            : (unsigned) SparcFloatRegClass::f0);
 }
 
 
@@ -978,47 +978,34 @@ void UltraSparcRegInfo::colorRetValue(MachineInstr *RetMI,
 
   assert((target.getInstrInfo()).isReturn( RetMI->getOpCode()));
 
-  // if there is an implicit ref, that has to be the ret value
-  if(RetMI->getNumImplicitRefs() > 0) {
-
-    // The first implicit operand is the return value of a return instr
-    const Value *RetVal =  RetMI->getImplicitRef(0);
-
-    LiveRange *LR = LRI.getLiveRangeForValue(RetVal); 
-
-    if (!LR) {
-      std::cerr << "\nNo LR for:" << RAV(RetVal) << "\n";
-      // assert( LR && "No LR for return value of non-void method");
-      return;
-    }
+  // To find the return value (if any), we can get the LLVM return instr.
+  // from the return address register, which is the first operand
+  Value* tmpI = RetMI->getOperand(0).getVRegValue();
+  ReturnInst* retI=cast<ReturnInst>(cast<TmpInstruction>(tmpI)->getOperand(0));
+  if (const Value *RetVal = retI->getReturnValue()) {
 
     unsigned RegClassID = getRegClassIDOfType(RetVal->getType());
     unsigned regType = getRegType(RetVal->getType());
-
-    unsigned CorrectCol;
-    if(RegClassID == IntRegClassID)
-      CorrectCol = SparcIntRegClass::i0;
-    else if(RegClassID == FloatRegClassID)
-      CorrectCol = SparcFloatRegClass::f0;
-    else {
-      assert (0 && "Unknown RegClass");
-      return;
-    }
+    unsigned CorrectCol = (RegClassID == IntRegClassID
+                           ? (unsigned) SparcIntRegClass::i0
+                           : (unsigned) SparcFloatRegClass::f0);
 
     // convert to unified number
     unsigned UniRetReg = getUnifiedRegNum(RegClassID, CorrectCol);
 
     // Mark the register as used by this instruction
     RetMI->insertUsedReg(UniRetReg);
-    
-    // if the LR received the correct color, NOTHING to do
-    
-    if (LR->hasColor() && LR->getColor() == CorrectCol)
-      return;
-    
-    if (LR->hasColor()) {
 
-      // We are here because the LR was allocted a regiter
+#ifdef CANNOT_PRECOPY_CALLARGS
+    LiveRange *LR = LRI.getLiveRangeForValue(RetVal); 
+    assert(LR && "No LR for return value of non-void method?");
+
+    if (LR->hasColor()) {
+      // if the LR received the correct color, NOTHING to do
+      if (LR->getColor() == CorrectCol)
+        return;
+    
+      // We are here because the LR was allocated a register
       // It may be the suggested register or not
 
       // copy the LR of retun value to i0 or f0
@@ -1034,6 +1021,7 @@ void UltraSparcRegInfo::colorRetValue(MachineInstr *RetMI,
                   LR->getSpillOffFromFP(), UniRetReg, regType);
       //std::cerr << "\nCopied the return value from stack\n";
     }
+#endif
   
   } // if there is a return value
 
@@ -1070,7 +1058,7 @@ UltraSparcRegInfo::cpReg2RegMI(std::vector<MachineInstr*>& mvec,
                                unsigned SrcReg,
                                unsigned DestReg,
                                int RegType) const {
-  assert( ((int)SrcReg != InvalidRegNum) && ((int)DestReg != InvalidRegNum) &&
+  assert( ((int)SrcReg != getInvalidRegNum()) && ((int)DestReg != getInvalidRegNum()) &&
 	  "Invalid Register");
   
   MachineInstr * MI = NULL;
@@ -1297,18 +1285,24 @@ UltraSparcRegInfo::insertCallerSavingCode
 
   CallArgsDescriptor* argDesc = CallArgsDescriptor::get(CallMI);
   
-  // Now find the LR of the return value of the call
-  // The last *implicit operand* is the return value of a call
-  // Insert it to to he PushedRegSet since we must not save that register
+  // Now check if the call has a return value (using argDesc) and if so,
+  // find the LR of the TmpInstruction representing the return value register.
+  // (using the last or second-last *implicit operand* of the call MI).
+  // Insert it to to the PushedRegSet since we must not save that register
   // and restore it after the call.
   // We do this because, we look at the LV set *after* the instruction
   // to determine, which LRs must be saved across calls. The return value
   // of the call is live in this set - but we must not save/restore it.
-
-  const Value *RetVal = argDesc->getReturnValue();
-
-  if (RetVal) {
-    LiveRange *RetValLR = PRA.LRI.getLiveRangeForValue( RetVal );
+  // 
+  if (const Value *origRetVal = argDesc->getReturnValue()) {
+    unsigned retValRefNum = (CallMI->getNumImplicitRefs() -
+                             (argDesc->getIndirectFuncPtr()? 1 : 2));
+    const TmpInstruction* tmpRetVal =
+      cast<TmpInstruction>(CallMI->getImplicitRef(retValRefNum));
+    assert(tmpRetVal->getOperand(0) == origRetVal &&
+           tmpRetVal->getType() == origRetVal->getType() &&
+           "Wrong implicit ref?");
+    LiveRange *RetValLR = PRA.LRI.getLiveRangeForValue( tmpRetVal );
     assert(RetValLR && "No LR for RetValue of call");
 
     if (RetValLR->hasColor())
@@ -1351,8 +1345,8 @@ UltraSparcRegInfo::insertCallerSavingCode
 	    // and add them to InstrnsBefore and InstrnsAfter of the
 	    // call instruction
             // 
-	    int StackOff = 
-	      PRA.MF.getInfo()->pushTempValue(getSpilledRegSize(RegType));
+	    int StackOff =
+              PRA.MF.getInfo()->pushTempValue(getSpilledRegSize(RegType));
             
 	    std::vector<MachineInstr*> AdIBef, AdIAft;
             
@@ -1361,7 +1355,9 @@ UltraSparcRegInfo::insertCallerSavingCode
             // We may need a scratch register to copy the saved value
             // to/from memory.  This may itself have to insert code to
             // free up a scratch register.  Any such code should go before
-            // the save code.
+            // the save code.  The scratch register, if any, is by default
+            // temporary and not "used" by the instruction unless the
+            // copy code itself decides to keep the value in the scratch reg.
             int scratchRegType = -1;
             int scratchReg = -1;
             if (regTypeNeedsScratchReg(RegType, scratchRegType))
@@ -1371,7 +1367,6 @@ UltraSparcRegInfo::insertCallerSavingCode
                 scratchReg = PRA.getUsableUniRegAtMI(scratchRegType, &LVSetBef,
                                                    CallMI, AdIBef, AdIAft);
                 assert(scratchReg != getInvalidRegNum());
-                CallMI->insertUsedReg(scratchReg); 
               }
             
             if (AdIBef.size() > 0)
@@ -1390,7 +1385,7 @@ UltraSparcRegInfo::insertCallerSavingCode
             // We may need a scratch register to copy the saved value
             // from memory.  This may itself have to insert code to
             // free up a scratch register.  Any such code should go
-            // after the save code.
+            // after the save code.  As above, scratch is not marked "used".
             // 
             scratchRegType = -1;
             scratchReg = -1;
@@ -1399,7 +1394,6 @@ UltraSparcRegInfo::insertCallerSavingCode
                 scratchReg = PRA.getUsableUniRegAtMI(scratchRegType, &LVSetAft,
                                                  CallMI, AdIBef, AdIAft);
                 assert(scratchReg != getInvalidRegNum());
-                CallMI->insertUsedReg(scratchReg); 
               }
             
             if (AdIBef.size() > 0)