An x86 function returns a floating point value in st(0), and we must make sure

it is popped, even if it is ununsed. A CopyFromReg node is too weak to represent
the required sideeffect, so insert an FpGET_ST0 instruction directly instead.

This will matter when CopyFromReg gets lowered to a generic COPY instruction.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108037 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jakob Stoklund Olesen 2010-07-10 04:04:25 +00:00
parent 7a029b6d7e
commit d737fcafc4

View File

@ -1347,17 +1347,34 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
report_fatal_error("SSE register return with SSE disabled"); report_fatal_error("SSE register return with SSE disabled");
} }
// If this is a call to a function that returns an fp value on the floating
// point stack, but where we prefer to use the value in xmm registers, copy
// it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
if ((VA.getLocReg() == X86::ST0 ||
VA.getLocReg() == X86::ST1) &&
isScalarFPTypeInSSEReg(VA.getValVT())) {
CopyVT = MVT::f80;
}
SDValue Val; SDValue Val;
if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
// If this is a call to a function that returns an fp value on the floating
// point stack, we must guarantee the the value is popped from the stack, so
// a CopyFromReg is not good enough - the copy instruction may be eliminated
// if the return value is not used. We use the FpGET_ST0 instructions
// instead.
if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
bool isST0 = VA.getLocReg() == X86::ST0;
unsigned Opc = 0;
if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32;
if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
SDValue Ops[] = { Chain, InFlag };
Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag,
Ops, 2), 1);
Val = Chain.getValue(0);
// Round the f80 to the right size, which also moves it to the appropriate
// xmm register.
if (CopyVT != VA.getValVT())
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
} else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
// For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64. // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
@ -1377,15 +1394,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Val = Chain.getValue(0); Val = Chain.getValue(0);
} }
InFlag = Chain.getValue(2); InFlag = Chain.getValue(2);
if (CopyVT != VA.getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register.
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
}
InVals.push_back(Val); InVals.push_back(Val);
} }