Fix Bug 4278: X86-64 with -tailcallopt calling convention

out of sync with regular cc.

The only difference between the tail call cc and the normal
cc was that one parameter register - R9 - was reserved for
calling functions through a function pointer. After time the
tail call cc has gotten out of sync with the regular cc. 

We can use R11 which is also caller saved but not used as
parameter register for potential function pointers and
remove the special tail call cc on x86-64.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73233 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Arnold Schwaighofer 2009-06-12 16:26:57 +00:00
parent b3c36c9c9a
commit bbd8c33ee6
5 changed files with 23 additions and 56 deletions

View File

@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
]>;
// Tail call convention (fast): One register is reserved for target address,
// namely R9
def CC_X86_64_TailCall : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<8, 8>>,
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
// The 'nest' parameter, if any, is passed in R10.
CCIfNest<CCAssignToReg<[R10]>>,
// The first 6 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
// The first 8 MMX (except for v1i64) vector arguments are passed in XMM
// registers on Darwin.
CCIfType<[v8i8, v4i16, v2i32, v2f32],
CCIfSubtarget<"isTargetDarwin()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
// The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
CCIfType<[v1i64],
CCIfSubtarget<"isTargetDarwin()",
CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//

View File

@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
else if (CC == CallingConv::Fast && isTaillCall)
return CC_X86_64_TailCall;
else
return CC_X86_64_C;
}

View File

@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
SDValue StackAdjustment = TailCall.getOperand(2);
assert(((TargetAddress.getOpcode() == ISD::Register &&
(cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) ||
TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
"Expecting an global address, external symbol, or register");
@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
else if (CC == CallingConv::Fast && PerformTailCallOpt)
return CC_X86_64_TailCall;
else
return CC_X86_64_C;
}
@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
} else if (IsTailCall) {
unsigned Opc = Is64Bit ? X86::R9 : X86::EAX;
unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
DAG.getRegister(Opc, getPointerTy()),

View File

@ -0,0 +1,15 @@
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
; calling convention out of sync with standard c calling convention on x86_64)
; Bug 4278.
declare fastcc double @tailcallee(x86_fp80, <2 x float>)
define fastcc double @tailcall() {
entry:
%tmp = fpext float 1.000000e+00 to x86_fp80
%tmp2 = tail call fastcc double @tailcallee( x86_fp80 %tmp, <2 x float> <float 1.000000e+00, float 1.000000e+00>)
ret double %tmp2
}

View File

@ -3,19 +3,19 @@
; Move param %in1 to temp register (%eax).
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl 40(%rsp), %eax}
; Add %in1 %p1 to another temporary register (%r9d).
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %edi, %r9d}
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl 32(%rsp), %r9d}
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %edi, %r10d}
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl 32(%rsp), %r10d}
; Move result of addition to stack.
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %r9d, 40(%rsp)}
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %r10d, 40(%rsp)}
; Move param %in2 to stack.
; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %eax, 32(%rsp)}
declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %a, i32 %b)
declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b)
define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %in1, i32 %in2) {
define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) {
entry:
%tmp = add i32 %in1, %p1
%retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %in2,i32 %tmp)
%retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
ret i32 %retval
}