diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7f99203a833..e9fcbd5a489 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>> ]>; -// Tail call convention (fast): One register is reserved for target address, -// namely R9 -def CC_X86_64_TailCall : CallingConv<[ - // Handles byval parameters. - CCIfByVal>, - - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, - - // The 'nest' parameter, if any, is passed in R10. - CCIfNest>, - - // The first 6 integer arguments are passed in integer registers. - CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>, - CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>, - - // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - - // The first 8 MMX (except for v1i64) vector arguments are passed in XMM - // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32, v2f32], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - - // The first 8 v1i64 vector arguments are passed in GPRs on Darwin. - CCIfType<[v1i64], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>, - - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, - - // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> -]>; - - //===----------------------------------------------------------------------===// // X86 C Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 33332e4cf74..2bcfd76a8cf 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && isTaillCall) - return CC_X86_64_TailCall; else return CC_X86_64_C; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2f94b67849e..36e3ab2e82a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { SDValue StackAdjustment = TailCall.getOperand(2); assert(((TargetAddress.getOpcode() == ISD::Register && (cast(TargetAddress)->getReg() == X86::EAX || - cast(TargetAddress)->getReg() == X86::R9)) || + cast(TargetAddress)->getReg() == X86::R11)) || TargetAddress.getOpcode() == ISD::TargetExternalSymbol || TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && "Expecting an global address, external symbol, or register"); @@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && PerformTailCallOpt) - return CC_X86_64_TailCall; else return CC_X86_64_C; } @@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); } else if (IsTailCall) { - unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; + unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Opc, getPointerTy()), diff --git a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll new file mode 100644 index 00000000000..001b7fc5a4a --- /dev/null +++ b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt +; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm + +; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call +; calling convention out of sync with standard c calling convention on x86_64) +; Bug 4278. + +declare fastcc double @tailcallee(x86_fp80, <2 x float>) + +define fastcc double @tailcall() { +entry: + %tmp = fpext float 1.000000e+00 to x86_fp80 + %tmp2 = tail call fastcc double @tailcallee( x86_fp80 %tmp, <2 x float> ) + ret double %tmp2 +} diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll index d734065c005..c81327e5143 100644 --- a/test/CodeGen/X86/tailcallstack64.ll +++ b/test/CodeGen/X86/tailcallstack64.ll @@ -3,19 +3,19 @@ ; Move param %in1 to temp register (%eax). ; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl 40(%rsp), %eax} ; Add %in1 %p1 to another temporary register (%r9d). -; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %edi, %r9d} -; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl 32(%rsp), %r9d} +; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %edi, %r10d} +; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl 32(%rsp), %r10d} ; Move result of addition to stack. -; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %r9d, 40(%rsp)} +; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %r10d, 40(%rsp)} ; Move param %in2 to stack. ; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl %eax, 32(%rsp)} -declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %a, i32 %b) +declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b) -define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %in1, i32 %in2) { +define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) { entry: %tmp = add i32 %in1, %p1 - %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %in2,i32 %tmp) + %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp) ret i32 %retval }