diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 5fdcc696193..a08e8fd4583 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -31,16 +31,12 @@ though?  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
 
 //===---------------------------------------------------------------------===//
 
-Need to add support for rotate instructions.
-
-//===---------------------------------------------------------------------===//
-
 Some targets (e.g. athlons) prefer freep to fstp ST(0):
 http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html
 
 //===---------------------------------------------------------------------===//
 
-This should use faddi on chips where it is profitable:
+This should use fiadd on chips where it is profitable:
 double foo(double P, int *I) { return P+*I; }
 
 //===---------------------------------------------------------------------===//
@@ -107,3 +103,12 @@ Should we promote i16 to i32 to avoid partial register update stalls?
 
 Leave any_extend as pseudo instruction and hint to register
 allocator. Delay codegen until post register allocation.
+
+//===---------------------------------------------------------------------===//
+
+Add a target specific hook to DAG combiner to handle SINT_TO_FP and
+FP_TO_SINT when the source operand is already in memory.
+
+//===---------------------------------------------------------------------===//
+
+Check if load folding would add a cycle in the dag.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 321a9c360a5..4849c95f839 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -194,6 +194,11 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
     // Set up the FP register classes.
     addRegisterClass(MVT::f64, X86::RFPRegisterClass);
 
+    if (X86DAGIsel) {
+      setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    }
+
     if (!UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
@@ -1404,22 +1409,30 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
   }
   case ISD::SINT_TO_FP: {
     assert(Op.getValueType() == MVT::f64 &&
-           Op.getOperand(0).getValueType() == MVT::i64 &&
+           Op.getOperand(0).getValueType() <= MVT::i64 &&
+           Op.getOperand(0).getValueType() >= MVT::i16 &&
            "Unknown SINT_TO_FP to lower!");
-    // We lower sint64->FP into a store to a temporary stack slot, followed by a
-    // FILD64m node.
+
+    SDOperand Result;
+    MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
+    unsigned Size = MVT::getSizeInBits(SrcVT)/8;
     MachineFunction &MF = DAG.getMachineFunction();
-    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+    int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
     SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-    SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
-                           Op.getOperand(0), StackSlot, DAG.getSrcValue(NULL));
-    std::vector<MVT::ValueType> RTs;
-    RTs.push_back(MVT::f64);
-    RTs.push_back(MVT::Other);
+    SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
+                                  DAG.getEntryNode(), Op.getOperand(0),
+                                  StackSlot, DAG.getSrcValue(NULL));
+
+    // Build the FILD
+    std::vector<MVT::ValueType> Tys;
+    Tys.push_back(MVT::f64);
+    Tys.push_back(MVT::Flag);
     std::vector<SDOperand> Ops;
-    Ops.push_back(Store);
+    Ops.push_back(Chain);
     Ops.push_back(StackSlot);
-    return DAG.getNode(X86ISD::FILD64m, RTs, Ops);
+    Ops.push_back(DAG.getValueType(SrcVT));
+    Result = DAG.getNode(X86ISD::FILD, Tys, Ops);
+    return Result;
   }
   case ISD::FP_TO_SINT: {
     assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
@@ -1749,7 +1762,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::SBB:                return "X86ISD::SBB";
   case X86ISD::SHLD:               return "X86ISD::SHLD";
   case X86ISD::SHRD:               return "X86ISD::SHRD";
-  case X86ISD::FILD64m:            return "X86ISD::FILD64m";
+  case X86ISD::FILD:               return "X86ISD::FILD";
   case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
   case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
   case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index a4d481aca31..1deb5f1304d 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -40,16 +40,16 @@ namespace llvm {
       SHLD,
       SHRD,
 
-      /// FILD64m - This instruction implements SINT_TO_FP with a
-      /// 64-bit source in memory and a FP reg result.  This corresponds to
-      /// the X86::FILD64m instruction.  It has two inputs (token chain and
-      /// address) and two outputs (FP value and token chain).
-      FILD64m,
+      /// FILD - This instruction implements SINT_TO_FP with the integer source
+      /// in memory and FP reg result.  This corresponds to the X86::FILD*m
+      /// instructions. It has three inputs (token chain, address, and source
+      /// type) and two outputs (FP value and token chain).
+      FILD,
 
       /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
       /// integer destination in memory and a FP reg source.  This corresponds
       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
-      /// has two inputs (token chain and address) and two outputs (FP value and
+      /// has two inputs (token chain and address) and two outputs (int value and
       /// token chain).
       FP_TO_INT16_IN_MEM,
       FP_TO_INT32_IN_MEM,
diff --git a/lib/Target/X86/X86ISelPattern.cpp b/lib/Target/X86/X86ISelPattern.cpp
index 6ca4078047d..cb1835489c1 100644
--- a/lib/Target/X86/X86ISelPattern.cpp
+++ b/lib/Target/X86/X86ISelPattern.cpp
@@ -2259,7 +2259,7 @@ unsigned ISel::SelectExpr(SDOperand N) {
       addFullAddress(BuildMI(BB, Opc, 4, Result), AM);
     }
     return Result;
-  case X86ISD::FILD64m:
+  case X86ISD::FILD:
     // Make sure we generate both values.
     assert(Result != 1 && N.getValueType() == MVT::f64);
     if (!ExprMap.insert(std::make_pair(N.getValue(1), 1)).second)
@@ -3301,7 +3301,7 @@ void ISel::Select(SDOperand N) {
     SelectExpr(N);
     return;
   case ISD::CopyFromReg:
-  case X86ISD::FILD64m:
+  case X86ISD::FILD:
     ExprMap.erase(N);
     SelectExpr(N.getValue(0));
     return;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 7431a3c1748..12d1333c2ed 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -50,7 +50,8 @@ def SDTX86Fld     : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
                                          SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
 def SDTX86Fst     : SDTypeProfile<0, 3, [SDTCisFP<0>,
                                          SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
-def SDTX86Fild64m : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisPtrTy<1>]>;
+def SDTX86Fild    : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisPtrTy<1>,
+                                         SDTCisVT<2, OtherVT>]>;
 
 def SDTX86RepStr  : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
 
@@ -99,7 +100,7 @@ def X86fld     : SDNode<"X86ISD::FLD",      SDTX86Fld,
                         [SDNPHasChain]>;
 def X86fst     : SDNode<"X86ISD::FST",      SDTX86Fst,
                         [SDNPHasChain]>;
-def X86fild64m : SDNode<"X86ISD::FILD64m",  SDTX86Fild64m,
+def X86fild    : SDNode<"X86ISD::FILD",     SDTX86Fild,
                         [SDNPHasChain]>;
 
 def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
@@ -2707,50 +2708,50 @@ def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">;
 // FIXME: Implement these when we have a dag-dag isel!
 def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fadd RFP:$src1,
-                                     (sint_to_fp (loadi16 addr:$src2))))]>;
+                                     (X86fild addr:$src2, i16)))]>;
                 // ST(0) = ST(0) + [mem16int]
 def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fadd RFP:$src1,
-                                     (sint_to_fp (loadi32 addr:$src2))))]>;
+                                     (X86fild addr:$src2, i32)))]>;
                 // ST(0) = ST(0) + [mem32int]
 def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fmul RFP:$src1,
-                                     (sint_to_fp (loadi16 addr:$src2))))]>;
+                                     (X86fild addr:$src2, i16)))]>;
                 // ST(0) = ST(0) * [mem16int]
 def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fmul RFP:$src1,
-                                     (sint_to_fp (loadi32 addr:$src2))))]>;
+                                     (X86fild addr:$src2, i32)))]>;
                 // ST(0) = ST(0) * [mem32int]
 def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fsub RFP:$src1,
-                                     (sint_to_fp (loadi16 addr:$src2))))]>;
+                                     (X86fild addr:$src2, i16)))]>;
                 // ST(0) = ST(0) - [mem16int]
 def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fsub RFP:$src1,
-                                     (sint_to_fp (loadi32 addr:$src2))))]>;
+                                     (X86fild addr:$src2, i32)))]>;
                 // ST(0) = ST(0) - [mem32int]
 def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                     [(set RFP:$dst, (fsub (sint_to_fp (loadi16 addr:$src2)),
+                     [(set RFP:$dst, (fsub (X86fild addr:$src2, i16),
                                       RFP:$src1))]>;
                 // ST(0) = [mem16int] - ST(0)
 def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                     [(set RFP:$dst, (fsub (sint_to_fp (loadi32 addr:$src2)),
+                     [(set RFP:$dst, (fsub (X86fild addr:$src2, i32),
                                       RFP:$src1))]>;
                 // ST(0) = [mem32int] - ST(0)
 def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fdiv RFP:$src1,
-                                     (sint_to_fp (loadi16 addr:$src2))))]>;
+                                     (X86fild addr:$src2, i16)))]>;
                 // ST(0) = ST(0) / [mem16int]
 def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP:$dst, (fdiv RFP:$src1,
-                                     (sint_to_fp (loadi32 addr:$src2))))]>;
+                                     (X86fild addr:$src2, i32)))]>;
                 // ST(0) = ST(0) / [mem32int]
 def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                     [(set RFP:$dst, (fdiv (sint_to_fp (loadi16 addr:$src2)),
+                     [(set RFP:$dst, (fdiv (X86fild addr:$src2, i16),
                                       RFP:$src1))]>;
                 // ST(0) = [mem16int] / ST(0)
 def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
-                     [(set RFP:$dst, (fdiv (sint_to_fp (loadi32 addr:$src2)),
+                     [(set RFP:$dst, (fdiv (X86fild addr:$src2, i32),
                                       RFP:$src1))]>;
                 // ST(0) = [mem32int] / ST(0)
 
@@ -2863,11 +2864,11 @@ def FpLD32m  : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP,
 def FpLD64m  : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP,
                    [(set RFP:$dst, (loadf64 addr:$src))]>;
 def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP,
-                   [(set RFP:$dst, (sint_to_fp (loadi16 addr:$src)))]>;
+                   [(set RFP:$dst, (X86fild addr:$src, i16))]>;
 def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP,
-                   [(set RFP:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+                   [(set RFP:$dst, (X86fild addr:$src, i32))]>;
 def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP,
-                   [(set RFP:$dst, (X86fild64m addr:$src))]>;
+                   [(set RFP:$dst, (X86fild addr:$src, i64))]>;
 
 def FpST32m   : FpI<(ops f32mem:$op, RFP:$src), OneArgFP,
                 [(truncstore RFP:$src, addr:$op, f32)]>;