diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 769e42d7783..8539cbf9640 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -446,24 +446,96 @@ static void GetRetValueLocs(const MVT::ValueType *VTs, unsigned NumVTs, // Otherwise, NumVTs is 1. MVT::ValueType ArgVT = VTs[0]; - if (MVT::isVector(ArgVT)) // Integer or FP vector result -> XMM0. - ResultRegs[0] = X86::XMM0; - else if (MVT::isFloatingPoint(ArgVT) && Subtarget->is64Bit()) - // FP values in X86-64 go in XMM0. - ResultRegs[0] = X86::XMM0; - else if (MVT::isFloatingPoint(ArgVT)) - // FP values in X86-32 go in ST0. - ResultRegs[0] = X86::ST0; - else { - assert(MVT::isInteger(ArgVT) && "Unknown return value type!"); - - // Integer result -> EAX / RAX. - // The C calling convention guarantees the return value has been - // promoted to at least MVT::i32. The X86-64 ABI doesn't require the - // value to be promoted MVT::i64. So we don't have to extend it to - // 64-bit. - ResultRegs[0] = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX; + unsigned Reg; + switch (ArgVT) { + case MVT::i8: Reg = X86::AL; break; + case MVT::i16: Reg = X86::AX; break; + case MVT::i32: Reg = X86::EAX; break; + case MVT::i64: Reg = X86::RAX; break; + case MVT::f32: + case MVT::f64: + if (Subtarget->is64Bit()) + Reg = X86::XMM0; // FP values in X86-64 go in XMM0. + else + Reg = X86::ST0; // FP values in X86-32 go in ST0. + break; + default: + assert(MVT::isVector(ArgVT) && "Unknown return value type!"); + Reg = X86::XMM0; // Int/FP vector result -> XMM0. + break; } + ResultRegs[0] = Reg; +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. The returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode *X86TargetLowering:: +LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG) { + SmallVector ResultVals; + + // We support returning up to two registers. + MVT::ValueType VTs[2]; + unsigned DestRegs[2]; + unsigned NumRegs = TheCall->getNumValues() - 1; + assert(NumRegs <= 2 && "Can only return up to two regs!"); + + for (unsigned i = 0; i != NumRegs; ++i) + VTs[i] = TheCall->getValueType(i); + + // Determine which register each value should be copied into. + GetRetValueLocs(VTs, NumRegs, DestRegs, Subtarget, CallingConv); + + // Copy all of the result registers out of their specified physreg. + if (NumRegs != 1 || DestRegs[0] != X86::ST0) { + for (unsigned i = 0; i != NumRegs; ++i) { + Chain = DAG.getCopyFromReg(Chain, DestRegs[i], VTs[i], + InFlag).getValue(1); + InFlag = Chain.getValue(2); + ResultVals.push_back(Chain.getValue(0)); + } + } else { + // Copies from the FP stack are special, as ST0 isn't a valid register + // before the fp stackifier runs. + + // Copy ST0 into an RFP register with FP_GET_RESULT. + SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); + SDOperand GROps[] = { Chain, InFlag }; + SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); + Chain = RetVal.getValue(1); + InFlag = RetVal.getValue(2); + + // If we are using ScalarSSE, store ST(0) to the stack and reload it into + // an XMM register. + if (X86ScalarSSE) { + // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This + // shouldn't be necessary except that RFP cannot be live across + // multiple blocks. When stackifier is fixed, they can be uncoupled. + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + SDOperand Ops[] = { + Chain, RetVal, StackSlot, DAG.getValueType(VTs[0]), InFlag + }; + Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); + RetVal = DAG.getLoad(VTs[0], Chain, StackSlot, NULL, 0); + Chain = RetVal.getValue(1); + } + + if (VTs[0] == MVT::f32 && !X86ScalarSSE) + // FIXME: we would really like to remember that this FP_ROUND + // operation is okay to eliminate if we allow excess FP precision. + RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); + ResultVals.push_back(RetVal); + } + + // Merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).Val; } @@ -696,7 +768,6 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; SDOperand Callee = Op.getOperand(4); - MVT::ValueType RetVT= Op.Val->getValueType(0); unsigned NumOps = (Op.getNumOperands() - 5) / 2; static const unsigned XMMArgRegs[] = { @@ -905,88 +976,12 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); Ops.push_back(InFlag); Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); - if (RetVT != MVT::Other) - InFlag = Chain.getValue(1); + InFlag = Chain.getValue(1); - SmallVector ResultVals; - switch (RetVT) { - default: assert(0 && "Unknown value type to return!"); - case MVT::Other: - NodeTys = DAG.getVTList(MVT::Other); - break; - case MVT::i8: - Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - NodeTys = DAG.getVTList(MVT::i8, MVT::Other); - break; - case MVT::i16: - Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - NodeTys = DAG.getVTList(MVT::i16, MVT::Other); - break; - case MVT::i32: - if (Op.Val->getValueType(1) == MVT::i32) { - Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, - Chain.getValue(2)).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - NodeTys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - } else { - Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - NodeTys = DAG.getVTList(MVT::i32, MVT::Other); - } - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - assert(!isStdCall && "Unknown value type to return!"); - Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - NodeTys = DAG.getVTList(RetVT, MVT::Other); - break; - case MVT::f32: - case MVT::f64: { - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); - SDOperand GROps[] = { Chain, InFlag }; - SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); - Chain = RetVal.getValue(1); - InFlag = RetVal.getValue(2); - if (X86ScalarSSE) { - // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This - // shouldn't be necessary except that RFP cannot be live across - // multiple blocks. When stackifier is fixed, they can be uncoupled. - MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); - SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - Tys = DAG.getVTList(MVT::Other); - SDOperand Ops[] = { - Chain, RetVal, StackSlot, DAG.getValueType(RetVT), InFlag - }; - Chain = DAG.getNode(X86ISD::FST, Tys, Ops, 5); - RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0); - Chain = RetVal.getValue(1); - } - - if (RetVT == MVT::f32 && !X86ScalarSSE) - // FIXME: we would really like to remember that this FP_ROUND - // operation is okay to eliminate if we allow excess FP precision. - RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); - ResultVals.push_back(RetVal); - NodeTys = DAG.getVTList(RetVT, MVT::Other); - break; - } - } - - // Merge everything together with a MERGE_VALUES node. - ResultVals.push_back(Chain); - SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, - &ResultVals[0], ResultVals.size()); - return Res.getValue(Op.ResNo); + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CallingConv::C, DAG), + Op.ResNo); } @@ -3946,9 +3941,8 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { default: assert(0 && "Unsupported calling convention"); case CallingConv::Fast: - if (EnableFastCC) { + if (EnableFastCC) return LowerFastCCCallTo(Op, DAG); - } // Falls through case CallingConv::C: return LowerCCCCallTo(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index f520ff813f4..7ebdc79af7a 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -360,6 +360,9 @@ namespace llvm { /// X86ScalarSSE - Select between SSE2 or x87 floating point ops. bool X86ScalarSSE; + SDNode *LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode*TheCall, + unsigned CallingConv, SelectionDAG &DAG); + // C and StdCall Calling Convention implementation. SDOperand LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, bool isStdCall = false);