From b8720787015dc73d8a050b063366be6c3ad01946 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 4 Jul 2012 19:28:31 +0000 Subject: [PATCH] Ensure CopyToReg nodes are always glued to the call instruction. The CopyToReg nodes that set up the argument registers before a call must be glued to the call instruction. Otherwise, the scheduler may emit the physreg copies long before the call, causing long live ranges for the fixed registers. Besides disabling good register allocation, that can also expose problems when EmitInstrWithCustomInserter() splits a basic block during the live range of a physreg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159721 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 52 +++++++------------------- test/CodeGen/X86/crash.ll | 16 ++++++++ test/CodeGen/X86/tailcall-largecode.ll | 10 ++--- 3 files changed, 34 insertions(+), 44 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ba6659357cc..c8c5daeda67 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2304,27 +2304,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes chained together with token chain - // and flag operands which copy the outgoing args into registers. - SDValue InFlag; - // Tail call byval lowering might overwrite argument registers so in case of - // tail call optimization the copies to registers are lowered later. - if (!isTailCall) - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (!isTailCall) { - Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc(), getPointerTy()), - InFlag); - InFlag = Chain.getValue(1); + RegsToPass.push_back(std::make_pair(unsigned(X86::EBX), + DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()))); } else { // If we are tail calling and generating PIC/GOT style code load the // address of the callee into ECX. The value in ecx is used as target of @@ -2362,12 +2347,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert((Subtarget->hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, - DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); - InFlag = Chain.getValue(1); + RegsToPass.push_back(std::make_pair(unsigned(X86::AL), + DAG.getConstant(NumXMMRegs, MVT::i8))); } - // For tail calls lower the arguments to the 'real' stack slot. if (isTailCall) { // Force all the incoming stack arguments to be loaded from the stack @@ -2381,8 +2364,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector MemOpChains2; SDValue FIN; int FI = 0; - // Do not flag preceding copytoreg stuff together with the following stuff. - InFlag = SDValue(); if (getTargetMachine().Options.GuaranteedTailCallOpt) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -2422,19 +2403,20 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains2[0], MemOpChains2.size()); - // Copy arguments to their registers. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - InFlag =SDValue(); - // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, FPDiff, dl); } + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into registers. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + if (getTargetMachine().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls @@ -2536,14 +2518,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - // Add an implicit use GOT pointer in EBX. - if (!isTailCall && Subtarget->isPICStyleGOT()) - Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); - - // Add an implicit use of AL for non-Windows x86 64-bit vararg functions. - if (Is64Bit && isVarArg && !IsWin64) - Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); - // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll index c71c6ec81de..9badfc82e99 100644 --- a/test/CodeGen/X86/crash.ll +++ b/test/CodeGen/X86/crash.ll @@ -426,3 +426,19 @@ while.end: ; preds = %if.then.i256 return: ; preds = %entry ret i64 -131 } + +; The tail call to a varargs function sets %AL. +; uitofp expands to an FCMOV instruction which splits the basic block. +; Make sure the live range of %AL isn't split. +@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32 +define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 { +entry: + %x7 = load i64* %this, align 8 + %sub = add i64 %x7, -1 + %conv = uitofp i64 %sub to float + %div = fmul float %conv, 5.000000e-01 + %conv2 = fpext float %div to double + tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2) + ret void +} +declare void @_Z6PrintFz(...) diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll index c3f4278aecb..e9b8721e660 100644 --- a/test/CodeGen/X86/tailcall-largecode.ll +++ b/test/CodeGen/X86/tailcall-largecode.ll @@ -49,6 +49,11 @@ define fastcc i32 @direct_manyargs() { ; CHECK: pushq ; Pass the stack argument. ; CHECK: movl $7, 16(%rsp) +; This is the large code model, so &manyargs_callee may not fit into +; the jmp instruction. Put it into a register which won't be clobbered +; while restoring callee-saved registers and won't be used for passing +; arguments. +; CHECK: movabsq $manyargs_callee, %rax ; Pass the register arguments, in the right registers. ; CHECK: movl $1, %edi ; CHECK: movl $2, %esi @@ -56,11 +61,6 @@ define fastcc i32 @direct_manyargs() { ; CHECK: movl $4, %ecx ; CHECK: movl $5, %r8d ; CHECK: movl $6, %r9d -; This is the large code model, so &manyargs_callee may not fit into -; the jmp instruction. Put it into R11, which won't be clobbered -; while restoring callee-saved registers and won't be used for passing -; arguments. -; CHECK: movabsq $manyargs_callee, %rax ; Adjust the stack to "return". ; CHECK: popq ; And tail-call to the target.