From 2d2970905cf745771d9c4f23293ca3de6659ab4f Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Tue, 23 May 2006 18:50:38 +0000 Subject: [PATCH] Implement an annoying part of the Darwin/X86 abi: the callee of a struct return argument pops the hidden struct pointer if present, not the caller. For example, in this testcase: struct X { int D, E, F, G; }; struct X bar() { struct X a; a.D = 0; a.E = 1; a.F = 2; a.G = 3; return a; } void foo(struct X *P) { *P = bar(); } We used to emit: _foo: subl $28, %esp movl 32(%esp), %eax movl %eax, (%esp) call _bar addl $28, %esp ret _bar: movl 4(%esp), %eax movl $0, (%eax) movl $1, 4(%eax) movl $2, 8(%eax) movl $3, 12(%eax) ret This is correct on Linux/X86 but not Darwin/X86. With this patch, we now emit: _foo: subl $28, %esp movl 32(%esp), %eax movl %eax, (%esp) call _bar *** addl $24, %esp ret _bar: movl 4(%esp), %eax movl $0, (%eax) movl $1, 4(%eax) movl $2, 8(%eax) movl $3, 12(%eax) *** ret $4 For the record, GCC emits (which is functionally equivalent to our new code): _bar: movl 4(%esp), %eax movl $3, 12(%eax) movl $2, 8(%eax) movl $1, 4(%eax) movl $0, (%eax) ret $4 _foo: pushl %esi subl $40, %esp movl 48(%esp), %esi leal 16(%esp), %eax movl %eax, (%esp) call _bar subl $4, %esp movl 16(%esp), %eax movl %eax, (%esi) movl 20(%esp), %eax movl %eax, 4(%esi) movl 24(%esp), %eax movl %eax, 8(%esi) movl 28(%esp), %eax movl %eax, 12(%esi) addl $40, %esp popl %esi ret This fixes SingleSource/Benchmarks/CoyoteBench/fftbench with LLC and the JIT, and fixes the X86-backend portion of PR729. The CBE still needs to be updated. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28438 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++++++-- lib/Target/X86/X86ISelLowering.h | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 78600804f68..1317510740b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -393,7 +393,8 @@ X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, if (CallingConv == CallingConv::Fast && EnableFastCC) return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); - return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); + return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, CallingConv, + Callee, Args, DAG); } //===----------------------------------------------------------------------===// @@ -520,6 +521,12 @@ void X86TargetLowering::PreprocessCCCArguments(std::vector &Args, ReturnAddrIndex = 0; // No return address slot generated yet. BytesToPopOnReturn = 0; // Callee pops nothing. BytesCallerReserves = ArgOffset; + + // If this is a struct return on Darwin/X86, the callee pops the hidden struct + // pointer. + if (F.getCallingConv() == CallingConv::CSRet && + Subtarget->isTargetDarwin()) + BytesToPopOnReturn = 4; } void X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { @@ -551,6 +558,7 @@ void X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { std::pair X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg, bool isTailCall, + unsigned CallingConv, SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG) { // Count how many bytes are to be pushed on the stack. @@ -704,13 +712,21 @@ X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); InFlag = Chain.getValue(1); + // Create the CALLSEQ_END node. + unsigned NumBytesForCalleeToPush = 0; + + // If this is is a call to a struct-return function on Darwin/X86, the callee + // pops the hidden struct pointer, so we have to push it back. + if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin()) + NumBytesForCalleeToPush = 4; + NodeTys.clear(); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. Ops.clear(); Ops.push_back(Chain); Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); - Ops.push_back(DAG.getConstant(0, getPointerTy())); + Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); Ops.push_back(InFlag); Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); InFlag = Chain.getValue(1); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0d93ec4294f..ea8793339f2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -374,7 +374,7 @@ namespace llvm { void LowerCCCArguments(SDOperand Op, SelectionDAG &DAG); std::pair LowerCCCCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg, - bool isTailCall, + bool isTailCall, unsigned CallingConv, SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG); // Fast Calling Convention implementation.