diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 1d9ae207b21..cf7fc1faa34 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -935,6 +935,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     assert(StackTop == 0 && "Stack should be empty after a call!");
     pushReg(getFPReg(MI->getOperand(0)));
     break;
+  case X86::FpGETRESULT80x2:
+    assert(StackTop == 0 && "Stack should be empty after a call!");
+    pushReg(getFPReg(MI->getOperand(0)));
+    pushReg(getFPReg(MI->getOperand(1)));
+    break;
   case X86::FpSETRESULT32:
   case X86::FpSETRESULT64:
   case X86::FpSETRESULT80:
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 53cd6121d95..c51cd80cb9f 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1152,6 +1152,26 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
     case X86ISD::GlobalBaseReg: 
       return getGlobalBaseReg();
 
+    case X86ISD::FP_GET_RESULT2: {
+      SDOperand Chain = N.getOperand(0);
+      SDOperand InFlag = N.getOperand(1);
+      AddToISelQueue(Chain);
+      AddToISelQueue(InFlag);
+      std::vector<MVT::ValueType> Tys;
+      Tys.push_back(MVT::f80);
+      Tys.push_back(MVT::f80);
+      Tys.push_back(MVT::Other);
+      Tys.push_back(MVT::Flag);
+      SDOperand Ops[] = { Chain, InFlag };
+      SDNode *ResNode = CurDAG->getTargetNode(X86::FpGETRESULT80x2, Tys,
+                                              Ops, 2);
+      Chain = SDOperand(ResNode, 2);
+      InFlag = SDOperand(ResNode, 3);
+      ReplaceUses(SDOperand(N.Val, 2), Chain);
+      ReplaceUses(SDOperand(N.Val, 3), InFlag);
+      return ResNode;
+    }
+
     case ISD::ADD: {
       // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
       // code and is matched first so to prevent it from being turned into
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 229810d0ffd..df7ebeb90ca 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -907,6 +907,44 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
                      &ResultVals[0], ResultVals.size()).Val;
 }
 
+/// LowerCallResultToTwo64BitRegs - Lower the result values of an x86-64
+/// ISD::CALL where the results are known to be in two 64-bit registers,
+/// e.g. XMM0 and XMM1. This simplify store the two values back to the
+/// fixed stack slot allocated for StructRet.
+SDNode *X86TargetLowering::
+LowerCallResultToTwo64BitRegs(SDOperand Chain, SDOperand InFlag,
+                              SDNode *TheCall, unsigned Reg1, unsigned Reg2,
+                              MVT::ValueType VT, SelectionDAG &DAG) {
+  SDOperand RetVal1 = DAG.getCopyFromReg(Chain, Reg1, VT, InFlag);
+  Chain = RetVal1.getValue(1);
+  InFlag = RetVal1.getValue(2);
+  SDOperand RetVal2 = DAG.getCopyFromReg(Chain, Reg2, VT, InFlag);
+  Chain = RetVal2.getValue(1);
+  InFlag = RetVal2.getValue(2);
+  SDOperand FIN = TheCall->getOperand(5);
+  Chain = DAG.getStore(Chain, RetVal1, FIN, NULL, 0);
+  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8));
+  Chain = DAG.getStore(Chain, RetVal2, FIN, NULL, 0);
+  return Chain.Val;
+}
+
+/// LowerCallResultToTwoX87Regs - Lower the result values of an x86-64 ISD::CALL
+/// where the results are known to be in ST0 and ST1.
+SDNode *X86TargetLowering::
+LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag,
+                            SDNode *TheCall, SelectionDAG &DAG) {
+  SmallVector<SDOperand, 8> ResultVals;
+  const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag };
+  SDVTList Tys = DAG.getVTList(VTs, 4);
+  SDOperand Ops[] = { Chain, InFlag };
+  SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2);
+  Chain = RetVal.getValue(2);
+  SDOperand FIN = TheCall->getOperand(5);
+  Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0);
+  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(16));
+  Chain = DAG.getStore(Chain, RetVal, FIN, NULL, 0);
+  return Chain.Val;
+}
 
 //===----------------------------------------------------------------------===//
 //                C & StdCall & Fast Calling Convention implementation
@@ -1253,6 +1291,64 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
   return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
 }
 
+/// ClassifyX86_64SRetCallReturn - Classify how to implement a x86-64
+/// struct return call to the specified function. X86-64 ABI specifies
+/// some SRet calls are actually returned in registers. Since current
+/// LLVM cannot represent multi-value calls, they are represent as 
+/// calls where the results are passed in a hidden struct provided by
+/// the caller. This function examines the type of the struct to
+/// determine the correct way to implement the call.
+X86::X86_64SRet
+X86TargetLowering::ClassifyX86_64SRetCallReturn(const Function *Fn) {
+  // FIXME: Disabled for now.
+  return X86::InMemory;
+
+  const PointerType *PTy = cast<PointerType>(Fn->arg_begin()->getType());
+  const Type *RTy = PTy->getElementType();
+  unsigned Size = getTargetData()->getABITypeSize(RTy);
+  if (Size != 16 && Size != 32)
+    return X86::InMemory;
+
+  if (Size == 32) {
+    const StructType *STy = dyn_cast<StructType>(RTy);
+    if (!STy) return X86::InMemory;
+    if (STy->getNumElements() == 2 &&
+        STy->getElementType(0) == Type::X86_FP80Ty &&
+        STy->getElementType(1) == Type::X86_FP80Ty)
+      return X86::InX87;
+  }
+
+  bool AllFP = true;
+  for (Type::subtype_iterator I = RTy->subtype_begin(), E = RTy->subtype_end();
+       I != E; ++I) {
+    const Type *STy = I->get();
+    if (!STy->isFPOrFPVector()) {
+      AllFP = false;
+      break;
+    }
+  }
+
+  if (AllFP)
+    return X86::InSSE;
+  return X86::InGPR64;
+}
+
+void X86TargetLowering::X86_64AnalyzeSRetCallOperands(SDNode *TheCall,
+                                                      CCAssignFn *Fn,
+                                                      CCState &CCInfo) {
+  unsigned NumOps = (TheCall->getNumOperands() - 5) / 2;
+  for (unsigned i = 1; i != NumOps; ++i) {
+    MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType();
+    SDOperand FlagOp = TheCall->getOperand(5+2*i+1);
+    unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue();
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) {
+      cerr << "Call operand #" << i << " has unhandled type "
+           << MVT::getValueTypeString(ArgVT) << "\n";
+      abort();
+    }
+  }
+}
+
 SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
   MachineFunction &MF = DAG.getMachineFunction();
   SDOperand Chain     = Op.getOperand(0);
@@ -1262,6 +1358,7 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
                         && CC == CallingConv::Fast && PerformTailCallOpt;
   SDOperand Callee    = Op.getOperand(4);
   bool Is64Bit        = Subtarget->is64Bit();
+  bool IsStructRet    = CallIsStructReturn(Op);
 
   assert(!(isVarArg && CC == CallingConv::Fast) &&
          "Var args not supported with calling convention fastcc");
@@ -1269,7 +1366,24 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeCallOperands(Op.Val, CCAssignFnForNode(Op));
+  CCAssignFn *CCFn = CCAssignFnForNode(Op);
+
+  X86::X86_64SRet SRetMethod = X86::InMemory;
+  if (Is64Bit && IsStructRet)
+    // FIXME: We can't figure out type of the sret structure for indirect
+    // calls. We need to copy more information from CallSite to the ISD::CALL
+    // node.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+      SRetMethod =
+        ClassifyX86_64SRetCallReturn(dyn_cast<Function>(G->getGlobal()));
+
+  // UGLY HACK! For x86-64, some 128-bit aggregates are returns in a pair of
+  // registers. Unfortunately, llvm does not support i128 yet so we pretend it's
+  // a sret call.
+  if (SRetMethod != X86::InMemory)
+    X86_64AnalyzeSRetCallOperands(Op.Val, CCFn, CCInfo);
+  else 
+    CCInfo.AnalyzeCallOperands(Op.Val, CCFn);
   
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1540,7 +1654,7 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
   unsigned NumBytesForCalleeToPush;
   if (IsCalleePop(Op))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
-  else if (!Is64Bit && CallIsStructReturn(Op))
+  else if (!Is64Bit && IsStructRet)
     // If this is is a call to a struct-return function, the callee
     // pops the hidden struct pointer, so we have to push it back.
     // This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -1557,7 +1671,21 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+  switch (SRetMethod) {
+  default:
+    return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+  case X86::InGPR64:
+    return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val,
+                                                   X86::RAX, X86::RDX,
+                                                   MVT::i64, DAG), Op.ResNo);
+  case X86::InSSE:
+    return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val,
+                                                   X86::XMM0, X86::XMM1,
+                                                   MVT::f64, DAG), Op.ResNo);
+  case X86::InX87:
+    return SDOperand(LowerCallResultToTwoX87Regs(Chain, InFlag, Op.Val, DAG),
+                     Op.ResNo);
+  }
 }
 
 
@@ -5114,6 +5242,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FLD:                return "X86ISD::FLD";
   case X86ISD::FST:                return "X86ISD::FST";
   case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
+  case X86ISD::FP_GET_RESULT2:     return "X86ISD::FP_GET_RESULT2";
   case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
   case X86ISD::CALL:               return "X86ISD::CALL";
   case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index f407dfb5b1e..b2b46469406 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -89,6 +89,10 @@ namespace llvm {
       /// writes a RFP result and a chain.
       FP_GET_RESULT,
 
+      /// FP_GET_RESULT2 - Same as FP_GET_RESULT except it copies two values
+      /// ST(0) and ST(1).
+      FP_GET_RESULT2,
+
       /// FP_SET_RESULT - This corresponds to FpSETRESULT pseudo instruction
       /// which copies the source operand to ST(0). It takes a chain+value and
       /// returns a chain and a flag.
@@ -201,96 +205,107 @@ namespace llvm {
     };
   }
 
- /// Define some predicates that are used for node matching.
- namespace X86 {
-   /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-   bool isPSHUFDMask(SDNode *N);
+  /// Define some predicates that are used for node matching.
+  namespace X86 {
+    /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFDMask(SDNode *N);
 
-   /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-   bool isPSHUFHWMask(SDNode *N);
+    /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFHWMask(SDNode *N);
 
-   /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-   bool isPSHUFLWMask(SDNode *N);
+    /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFLWMask(SDNode *N);
 
-   /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to SHUFP*.
-   bool isSHUFPMask(SDNode *N);
+    /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to SHUFP*.
+    bool isSHUFPMask(SDNode *N);
 
-   /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-   bool isMOVHLPSMask(SDNode *N);
+    /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+    bool isMOVHLPSMask(SDNode *N);
 
-   /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
-   /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
-   /// <2, 3, 2, 3>
-   bool isMOVHLPS_v_undef_Mask(SDNode *N);
+    /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+    /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+    /// <2, 3, 2, 3>
+    bool isMOVHLPS_v_undef_Mask(SDNode *N);
 
-   /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
-   bool isMOVLPMask(SDNode *N);
+    /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+    bool isMOVLPMask(SDNode *N);
 
-   /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
-   /// as well as MOVLHPS.
-   bool isMOVHPMask(SDNode *N);
+    /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
+    /// as well as MOVLHPS.
+    bool isMOVHPMask(SDNode *N);
 
-   /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to UNPCKL.
-   bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false);
+    /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+    bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false);
 
-   /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to UNPCKH.
-   bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false);
+    /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to UNPCKH.
+    bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false);
 
-   /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
-   /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
-   /// <0, 0, 1, 1>
-   bool isUNPCKL_v_undef_Mask(SDNode *N);
+    /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+    /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+    /// <0, 0, 1, 1>
+    bool isUNPCKL_v_undef_Mask(SDNode *N);
 
-   /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
-   /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
-   /// <2, 2, 3, 3>
-   bool isUNPCKH_v_undef_Mask(SDNode *N);
+    /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+    /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+    /// <2, 2, 3, 3>
+    bool isUNPCKH_v_undef_Mask(SDNode *N);
 
-   /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to MOVSS,
-   /// MOVSD, and MOVD, i.e. setting the lowest element.
-   bool isMOVLMask(SDNode *N);
+    /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSS,
+    /// MOVSD, and MOVD, i.e. setting the lowest element.
+    bool isMOVLMask(SDNode *N);
 
-   /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
-   bool isMOVSHDUPMask(SDNode *N);
+    /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+    bool isMOVSHDUPMask(SDNode *N);
 
-   /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
-   bool isMOVSLDUPMask(SDNode *N);
+    /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+    bool isMOVSLDUPMask(SDNode *N);
 
-   /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a splat of a single element.
-   bool isSplatMask(SDNode *N);
+    /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a splat of a single element.
+    bool isSplatMask(SDNode *N);
 
-   /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
-   /// specifies a splat of zero element.
-   bool isSplatLoMask(SDNode *N);
+    /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a splat of zero element.
+    bool isSplatLoMask(SDNode *N);
 
-   /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
-   /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
-   /// instructions.
-   unsigned getShuffleSHUFImmediate(SDNode *N);
+    /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+    /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+    /// instructions.
+    unsigned getShuffleSHUFImmediate(SDNode *N);
 
-   /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
-   /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
-   /// instructions.
-   unsigned getShufflePSHUFHWImmediate(SDNode *N);
+    /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+    /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+    /// instructions.
+    unsigned getShufflePSHUFHWImmediate(SDNode *N);
 
-   /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle
-   /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
-   /// instructions.
-   unsigned getShufflePSHUFLWImmediate(SDNode *N);
- }
+    /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle
+    /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+    /// instructions.
+    unsigned getShufflePSHUFLWImmediate(SDNode *N);
+  }
+
+  namespace X86 {
+   /// X86_64SRet - These represent different ways to implement x86_64 struct
+   /// returns call results.
+   enum X86_64SRet {
+     InMemory,    // Really is sret, returns in memory.
+     InGPR64,     // Returns in a pair of 64-bit integer registers.
+     InSSE,       // Returns in a pair of SSE registers.
+     InX87        // Returns in a pair of f80 X87 registers.
+   };
+  }
 
   //===--------------------------------------------------------------------===//
   //  X86TargetLowering - X86 Implementation of the TargetLowering interface
@@ -441,10 +456,21 @@ namespace llvm {
     /// When SSE2 is available, use it for f64 operations.
     bool X86ScalarSSEf32;
     bool X86ScalarSSEf64;
-    
+
+    X86::X86_64SRet ClassifyX86_64SRetCallReturn(const Function *Fn);
+
+    void X86_64AnalyzeSRetCallOperands(SDNode*, CCAssignFn*, CCState&);
+
     SDNode *LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode*TheCall,
                             unsigned CallingConv, SelectionDAG &DAG);
-        
+
+    SDNode *LowerCallResultToTwo64BitRegs(SDOperand Chain, SDOperand InFlag,
+                                          SDNode *TheCall, unsigned Reg1,
+                                          unsigned Reg2, MVT::ValueType VT,
+                                          SelectionDAG &DAG);        
+
+    SDNode *LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag,
+                                        SDNode *TheCall, SelectionDAG &DAG);        
 
     SDOperand LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
                                const CCValAssign &VA,  MachineFrameInfo *MFI,
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index cdd455edbf2..a55e73130c7 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -90,7 +90,7 @@ def IMPLICIT_DEF_GR64  : I<0, Pseudo, (outs GR64:$dst), (ins),
 let isCall = 1 in
   // All calls clobber the non-callee saved registers...
   let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
               MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
               XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS] in {
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 98c650e199a..2110f75edc2 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -18,6 +18,7 @@
 //===----------------------------------------------------------------------===//
 
 def SDTX86FpGet     : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
+def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
 def SDTX86FpSet     : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
 def SDTX86Fld       : SDTypeProfile<1, 2, [SDTCisFP<0>,
                                            SDTCisPtrTy<1>, 
@@ -33,6 +34,8 @@ def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
 def X86fpget        : SDNode<"X86ISD::FP_GET_RESULT", SDTX86FpGet,
                              [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+def X86fpget2       : SDNode<"X86ISD::FP_GET_RESULT2", SDTX86FpGet2,
+                             [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
 def X86fpset        : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet,
                              [SDNPHasChain, SDNPOutFlag]>;
 def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
@@ -145,6 +148,10 @@ def FpGETRESULT64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP,
 def FpGETRESULT80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP,
                       [(set RFP80:$dst, X86fpget)]>;           // FPR = ST(0)
 
+def FpGETRESULT80x2 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP,
+                      []>;                        // FPR = ST(0), FPR = ST(1)
+
+
 let Defs = [ST0] in {
 def FpSETRESULT32 : FpI_<(outs), (ins RFP32:$src), SpecialFP,
                       [(X86fpset RFP32:$src)]>;// ST(0) = FPR