diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp
index 7394d8f2e1c..1df8cb8ca11 100644
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@@ -1207,7 +1207,7 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
     return;
   case cInt:
   case cShort:
-    BMI(BB, MBBI, Class == cInt ? X86::IMULr32 : X86::IMULr16, 2, DestReg)
+    BMI(BB, MBBI, Class == cInt ? X86::IMULrr32 : X86::IMULrr16, 2, DestReg)
       .addReg(op0Reg).addReg(op1Reg);
     return;
   case cByte:
@@ -1255,6 +1255,14 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB,
       return;
     }
   }
+  
+  if (Class == cShort) {
+    BMI(MBB, IP, X86::IMULri16, 2, DestReg).addReg(op0Reg).addZImm(ConstRHS);
+    return;
+  } else if (Class == cInt) {
+    BMI(MBB, IP, X86::IMULri32, 2, DestReg).addReg(op0Reg).addZImm(ConstRHS);
+    return;
+  }
 
   // Most general case, emit a normal multiply...
   static const unsigned MOVirTab[] = {
@@ -1301,7 +1309,7 @@ void ISel::visitMul(BinaryOperator &I) {
 
     MachineBasicBlock::iterator MBBI = BB->end();
     unsigned AHBLReg = makeAnotherReg(Type::UIntTy);   // AH*BL
-    BMI(BB, MBBI, X86::IMULr32, 2, AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
+    BMI(BB, MBBI, X86::IMULrr32, 2, AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
 
     unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
     BuildMI(BB, X86::ADDrr32, 2,                         // AH*BL+(AL*BL >> 32)
@@ -1309,7 +1317,7 @@ void ISel::visitMul(BinaryOperator &I) {
     
     MBBI = BB->end();
     unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
-    BMI(BB, MBBI, X86::IMULr32, 2, ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
+    BMI(BB, MBBI, X86::IMULrr32, 2, ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
     
     BuildMI(BB, X86::ADDrr32, 2,               // AL*BH + AH*BL + (AL*BL >> 32)
 	    DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
diff --git a/lib/Target/X86/Printer.cpp b/lib/Target/X86/Printer.cpp
index aaebb5b8492..b2e656a5322 100644
--- a/lib/Target/X86/Printer.cpp
+++ b/lib/Target/X86/Printer.cpp
@@ -722,19 +722,24 @@ void Printer::printMachineInstruction(const MachineInstr *MI) {
   }
 
   case X86II::MRMSrcReg: {
-    // There is a two forms that are acceptable for MRMSrcReg instructions,
+    // There is are three forms that are acceptable for MRMSrcReg instructions,
     // those with 3 and 2 operands:
     //
     // 3 Operands: in this form, the last register (the second input) is the
     // ModR/M input.  The first two operands should be the same, post register
     // allocation.  This is for things like: add r32, r/m32
     //
+    // 3 Operands: in this form, we can have 'INST R, R, imm', which is used for
+    // instructions like the IMULri instructions.
+    //
     // 2 Operands: this is for things like mov that do not read a second input
     //
     assert(MI->getOperand(0).isRegister() &&
            MI->getOperand(1).isRegister() &&
            (MI->getNumOperands() == 2 || 
-            (MI->getNumOperands() == 3 && MI->getOperand(2).isRegister()))
+            (MI->getNumOperands() == 3 && 
+             (MI->getOperand(2).isRegister() ||
+              MI->getOperand(2).isImmediate())))
            && "Bad format for MRMSrcReg!");
     if (MI->getNumOperands() == 3 &&
         MI->getOperand(0).getReg() != MI->getOperand(1).getReg())
@@ -742,6 +747,13 @@ void Printer::printMachineInstruction(const MachineInstr *MI) {
 
     O << TII.getName(MI->getOpCode()) << " ";
     printOp(MI->getOperand(0));
+
+    // If this is IMULri* instructions, print the non-two-address operand.
+    if (MI->getNumOperands() == 3 && MI->getOperand(2).isImmediate()) {
+      O << ", ";
+      printOp(MI->getOperand(1));
+    }
+
     O << ", ";
     printOp(MI->getOperand(MI->getNumOperands()-1));
     O << "\n";
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index aaebb5b8492..b2e656a5322 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -722,19 +722,24 @@ void Printer::printMachineInstruction(const MachineInstr *MI) {
   }
 
   case X86II::MRMSrcReg: {
-    // There is a two forms that are acceptable for MRMSrcReg instructions,
+    // There is are three forms that are acceptable for MRMSrcReg instructions,
     // those with 3 and 2 operands:
     //
     // 3 Operands: in this form, the last register (the second input) is the
     // ModR/M input.  The first two operands should be the same, post register
     // allocation.  This is for things like: add r32, r/m32
     //
+    // 3 Operands: in this form, we can have 'INST R, R, imm', which is used for
+    // instructions like the IMULri instructions.
+    //
     // 2 Operands: this is for things like mov that do not read a second input
     //
     assert(MI->getOperand(0).isRegister() &&
            MI->getOperand(1).isRegister() &&
            (MI->getNumOperands() == 2 || 
-            (MI->getNumOperands() == 3 && MI->getOperand(2).isRegister()))
+            (MI->getNumOperands() == 3 && 
+             (MI->getOperand(2).isRegister() ||
+              MI->getOperand(2).isImmediate())))
            && "Bad format for MRMSrcReg!");
     if (MI->getNumOperands() == 3 &&
         MI->getOperand(0).getReg() != MI->getOperand(1).getReg())
@@ -742,6 +747,13 @@ void Printer::printMachineInstruction(const MachineInstr *MI) {
 
     O << TII.getName(MI->getOpCode()) << " ";
     printOp(MI->getOperand(0));
+
+    // If this is IMULri* instructions, print the non-two-address operand.
+    if (MI->getNumOperands() == 3 && MI->getOperand(2).isImmediate()) {
+      O << ", ";
+      printOp(MI->getOperand(1));
+    }
+
     O << ", ";
     printOp(MI->getOperand(MI->getNumOperands()-1));
     O << "\n";
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index aaac24848c5..f3fd142ed86 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -12,7 +12,7 @@
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Value.h"
+#include "llvm/Function.h"
 #include "Support/Debug.h"
 #include "Support/Statistic.h"
 #include "Config/alloca.h"
@@ -243,15 +243,12 @@ void Emitter::emitGlobalAddressForCall(GlobalValue *GV) {
   // Get the address from the backend...
   unsigned Address = MCE.getGlobalValueAddress(GV);
   
-  // If the machine code emitter doesn't know what the address IS yet, we have
-  // to take special measures.
-  //
   if (Address == 0) {
     // FIXME: this is JIT specific!
     if (TheJITResolver == 0)
       TheJITResolver = new JITResolver(MCE);
     Address = TheJITResolver->addFunctionReference(MCE.getCurrentPCValue(),
-                                                   (Function*)GV);
+                                                   cast<Function>(GV));
   }
   emitMaybePCRelativeValue(Address, true);
 }
@@ -536,8 +533,19 @@ void Emitter::emitInstruction(MachineInstr &MI) {
 
   case X86II::MRMSrcReg:
     MCE.emitByte(BaseOpcode);
-    emitRegModRMByte(MI.getOperand(MI.getNumOperands()-1).getReg(),
-                     getX86RegNum(MI.getOperand(0).getReg()));
+
+    if (MI.getNumOperands() == 2) {
+      emitRegModRMByte(MI.getOperand(MI.getNumOperands()-1).getReg(),
+                       getX86RegNum(MI.getOperand(0).getReg()));
+    } else if (MI.getOperand(2).isImmediate()) {
+      emitRegModRMByte(MI.getOperand(1).getReg(),
+                       getX86RegNum(MI.getOperand(0).getReg()));
+
+      emitConstant(MI.getOperand(2).getImmedValue(), sizeOfPtr(Desc));
+    } else {
+      emitRegModRMByte(MI.getOperand(2).getReg(),
+                       getX86RegNum(MI.getOperand(0).getReg()));
+    }
     break;
 
   case X86II::MRMSrcMem:
diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp
index 7394d8f2e1c..1df8cb8ca11 100644
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@@ -1207,7 +1207,7 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI,
     return;
   case cInt:
   case cShort:
-    BMI(BB, MBBI, Class == cInt ? X86::IMULr32 : X86::IMULr16, 2, DestReg)
+    BMI(BB, MBBI, Class == cInt ? X86::IMULrr32 : X86::IMULrr16, 2, DestReg)
       .addReg(op0Reg).addReg(op1Reg);
     return;
   case cByte:
@@ -1255,6 +1255,14 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB,
       return;
     }
   }
+  
+  if (Class == cShort) {
+    BMI(MBB, IP, X86::IMULri16, 2, DestReg).addReg(op0Reg).addZImm(ConstRHS);
+    return;
+  } else if (Class == cInt) {
+    BMI(MBB, IP, X86::IMULri32, 2, DestReg).addReg(op0Reg).addZImm(ConstRHS);
+    return;
+  }
 
   // Most general case, emit a normal multiply...
   static const unsigned MOVirTab[] = {
@@ -1301,7 +1309,7 @@ void ISel::visitMul(BinaryOperator &I) {
 
     MachineBasicBlock::iterator MBBI = BB->end();
     unsigned AHBLReg = makeAnotherReg(Type::UIntTy);   // AH*BL
-    BMI(BB, MBBI, X86::IMULr32, 2, AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
+    BMI(BB, MBBI, X86::IMULrr32, 2, AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
 
     unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
     BuildMI(BB, X86::ADDrr32, 2,                         // AH*BL+(AL*BL >> 32)
@@ -1309,7 +1317,7 @@ void ISel::visitMul(BinaryOperator &I) {
     
     MBBI = BB->end();
     unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
-    BMI(BB, MBBI, X86::IMULr32, 2, ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
+    BMI(BB, MBBI, X86::IMULrr32, 2, ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
     
     BuildMI(BB, X86::ADDrr32, 2,               // AL*BH + AH*BL + (AL*BL >> 32)
 	    DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index c35164e2894..7c940ba43cc 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -255,8 +255,10 @@ def SUBri32  : I2A32<"sub", 0x81, MRMS5r    >,         Pattern<(set R32, (minus
 
 def SBBrr32  : I2A32<"sbb", 0x19, MRMDestReg>;                // R32 -= R32+Carry
 
-def IMULr16  : I2A16<"imul", 0xAF, MRMSrcReg>, TB, OpSize, Pattern<(set R16, (times R16, R16))>;
-def IMULr32  : I2A32<"imul", 0xAF, MRMSrcReg>, TB        , Pattern<(set R32, (times R32, R32))>;
+def IMULrr16 : I2A16<"imul", 0xAF, MRMSrcReg>, TB, OpSize, Pattern<(set R16, (times R16, R16))>;
+def IMULrr32 : I2A32<"imul", 0xAF, MRMSrcReg>, TB        , Pattern<(set R32, (times R32, R32))>;
+def IMULri16 : I2A16<"imul", 0x69, MRMSrcReg>,     OpSize;
+def IMULri32 : I2A32<"imul", 0x69, MRMSrcReg>;
 
 // Logical operators...
 def ANDrr8   : I2A8 <"and", 0x20, MRMDestReg>,         Pattern<(set R8 , (and R8 , R8 ))>;