diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ba6659357cc..c8c5daeda67 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2304,27 +2304,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes chained together with token chain - // and flag operands which copy the outgoing args into registers. - SDValue InFlag; - // Tail call byval lowering might overwrite argument registers so in case of - // tail call optimization the copies to registers are lowered later. - if (!isTailCall) - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (!isTailCall) { - Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc(), getPointerTy()), - InFlag); - InFlag = Chain.getValue(1); + RegsToPass.push_back(std::make_pair(unsigned(X86::EBX), + DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()))); } else { // If we are tail calling and generating PIC/GOT style code load the // address of the callee into ECX. The value in ecx is used as target of @@ -2362,12 +2347,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert((Subtarget->hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, - DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); - InFlag = Chain.getValue(1); + RegsToPass.push_back(std::make_pair(unsigned(X86::AL), + DAG.getConstant(NumXMMRegs, MVT::i8))); } - // For tail calls lower the arguments to the 'real' stack slot. if (isTailCall) { // Force all the incoming stack arguments to be loaded from the stack @@ -2381,8 +2364,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector MemOpChains2; SDValue FIN; int FI = 0; - // Do not flag preceding copytoreg stuff together with the following stuff. - InFlag = SDValue(); if (getTargetMachine().Options.GuaranteedTailCallOpt) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -2422,19 +2403,20 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains2[0], MemOpChains2.size()); - // Copy arguments to their registers. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - InFlag =SDValue(); - // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, FPDiff, dl); } + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into registers. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + if (getTargetMachine().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls @@ -2536,14 +2518,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - // Add an implicit use GOT pointer in EBX. - if (!isTailCall && Subtarget->isPICStyleGOT()) - Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); - - // Add an implicit use of AL for non-Windows x86 64-bit vararg functions. - if (Is64Bit && isVarArg && !IsWin64) - Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); - // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll index c71c6ec81de..9badfc82e99 100644 --- a/test/CodeGen/X86/crash.ll +++ b/test/CodeGen/X86/crash.ll @@ -426,3 +426,19 @@ while.end: ; preds = %if.then.i256 return: ; preds = %entry ret i64 -131 } + +; The tail call to a varargs function sets %AL. +; uitofp expands to an FCMOV instruction which splits the basic block. +; Make sure the live range of %AL isn't split. +@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32 +define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 { +entry: + %x7 = load i64* %this, align 8 + %sub = add i64 %x7, -1 + %conv = uitofp i64 %sub to float + %div = fmul float %conv, 5.000000e-01 + %conv2 = fpext float %div to double + tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2) + ret void +} +declare void @_Z6PrintFz(...) diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll index c3f4278aecb..e9b8721e660 100644 --- a/test/CodeGen/X86/tailcall-largecode.ll +++ b/test/CodeGen/X86/tailcall-largecode.ll @@ -49,6 +49,11 @@ define fastcc i32 @direct_manyargs() { ; CHECK: pushq ; Pass the stack argument. ; CHECK: movl $7, 16(%rsp) +; This is the large code model, so &manyargs_callee may not fit into +; the jmp instruction. Put it into a register which won't be clobbered +; while restoring callee-saved registers and won't be used for passing +; arguments. +; CHECK: movabsq $manyargs_callee, %rax ; Pass the register arguments, in the right registers. ; CHECK: movl $1, %edi ; CHECK: movl $2, %esi @@ -56,11 +61,6 @@ define fastcc i32 @direct_manyargs() { ; CHECK: movl $4, %ecx ; CHECK: movl $5, %r8d ; CHECK: movl $6, %r9d -; This is the large code model, so &manyargs_callee may not fit into -; the jmp instruction. Put it into R11, which won't be clobbered -; while restoring callee-saved registers and won't be used for passing -; arguments. -; CHECK: movabsq $manyargs_callee, %rax ; Adjust the stack to "return". ; CHECK: popq ; And tail-call to the target.