mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-31 09:32:11 +00:00
Ensure CopyToReg nodes are always glued to the call instruction.
The CopyToReg nodes that set up the argument registers before a call must be glued to the call instruction. Otherwise, the scheduler may emit the physreg copies long before the call, causing long live ranges for the fixed registers. Besides disabling good register allocation, that can also expose problems when EmitInstrWithCustomInserter() splits a basic block during the live range of a physreg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159721 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f86c00f1f8
commit
b872078701
@ -2304,27 +2304,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
&MemOpChains[0], MemOpChains.size());
|
||||
|
||||
// Build a sequence of copy-to-reg nodes chained together with token chain
|
||||
// and flag operands which copy the outgoing args into registers.
|
||||
SDValue InFlag;
|
||||
// Tail call byval lowering might overwrite argument registers so in case of
|
||||
// tail call optimization the copies to registers are lowered later.
|
||||
if (!isTailCall)
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
||||
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
|
||||
RegsToPass[i].second, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
if (Subtarget->isPICStyleGOT()) {
|
||||
// ELF / PIC requires GOT in the EBX register before function calls via PLT
|
||||
// GOT pointer.
|
||||
if (!isTailCall) {
|
||||
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
|
||||
DAG.getNode(X86ISD::GlobalBaseReg,
|
||||
DebugLoc(), getPointerTy()),
|
||||
InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
|
||||
DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy())));
|
||||
} else {
|
||||
// If we are tail calling and generating PIC/GOT style code load the
|
||||
// address of the callee into ECX. The value in ecx is used as target of
|
||||
@ -2362,12 +2347,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
assert((Subtarget->hasSSE1() || !NumXMMRegs)
|
||||
&& "SSE registers cannot be used when SSE is disabled");
|
||||
|
||||
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
|
||||
DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
|
||||
DAG.getConstant(NumXMMRegs, MVT::i8)));
|
||||
}
|
||||
|
||||
|
||||
// For tail calls lower the arguments to the 'real' stack slot.
|
||||
if (isTailCall) {
|
||||
// Force all the incoming stack arguments to be loaded from the stack
|
||||
@ -2381,8 +2364,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
SmallVector<SDValue, 8> MemOpChains2;
|
||||
SDValue FIN;
|
||||
int FI = 0;
|
||||
// Do not flag preceding copytoreg stuff together with the following stuff.
|
||||
InFlag = SDValue();
|
||||
if (getTargetMachine().Options.GuaranteedTailCallOpt) {
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
@ -2422,19 +2403,20 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
&MemOpChains2[0], MemOpChains2.size());
|
||||
|
||||
// Copy arguments to their registers.
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
||||
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
|
||||
RegsToPass[i].second, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
InFlag =SDValue();
|
||||
|
||||
// Store the return address to the appropriate stack slot.
|
||||
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
|
||||
FPDiff, dl);
|
||||
}
|
||||
|
||||
// Build a sequence of copy-to-reg nodes chained together with token chain
|
||||
// and flag operands which copy the outgoing args into registers.
|
||||
SDValue InFlag;
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
||||
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
|
||||
RegsToPass[i].second, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
|
||||
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
|
||||
// In the 64-bit large code model, we have to make all calls
|
||||
@ -2536,14 +2518,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
|
||||
RegsToPass[i].second.getValueType()));
|
||||
|
||||
// Add an implicit use GOT pointer in EBX.
|
||||
if (!isTailCall && Subtarget->isPICStyleGOT())
|
||||
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
|
||||
|
||||
// Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
|
||||
if (Is64Bit && isVarArg && !IsWin64)
|
||||
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
|
||||
|
||||
// Add a register mask operand representing the call-preserved registers.
|
||||
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
|
||||
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
|
||||
|
@ -426,3 +426,19 @@ while.end: ; preds = %if.then.i256
|
||||
return: ; preds = %entry
|
||||
ret i64 -131
|
||||
}
|
||||
|
||||
; The tail call to a varargs function sets %AL.
|
||||
; uitofp expands to an FCMOV instruction which splits the basic block.
|
||||
; Make sure the live range of %AL isn't split.
|
||||
@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
|
||||
define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 {
|
||||
entry:
|
||||
%x7 = load i64* %this, align 8
|
||||
%sub = add i64 %x7, -1
|
||||
%conv = uitofp i64 %sub to float
|
||||
%div = fmul float %conv, 5.000000e-01
|
||||
%conv2 = fpext float %div to double
|
||||
tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
|
||||
ret void
|
||||
}
|
||||
declare void @_Z6PrintFz(...)
|
||||
|
@ -49,6 +49,11 @@ define fastcc i32 @direct_manyargs() {
|
||||
; CHECK: pushq
|
||||
; Pass the stack argument.
|
||||
; CHECK: movl $7, 16(%rsp)
|
||||
; This is the large code model, so &manyargs_callee may not fit into
|
||||
; the jmp instruction. Put it into a register which won't be clobbered
|
||||
; while restoring callee-saved registers and won't be used for passing
|
||||
; arguments.
|
||||
; CHECK: movabsq $manyargs_callee, %rax
|
||||
; Pass the register arguments, in the right registers.
|
||||
; CHECK: movl $1, %edi
|
||||
; CHECK: movl $2, %esi
|
||||
@ -56,11 +61,6 @@ define fastcc i32 @direct_manyargs() {
|
||||
; CHECK: movl $4, %ecx
|
||||
; CHECK: movl $5, %r8d
|
||||
; CHECK: movl $6, %r9d
|
||||
; This is the large code model, so &manyargs_callee may not fit into
|
||||
; the jmp instruction. Put it into R11, which won't be clobbered
|
||||
; while restoring callee-saved registers and won't be used for passing
|
||||
; arguments.
|
||||
; CHECK: movabsq $manyargs_callee, %rax
|
||||
; Adjust the stack to "return".
|
||||
; CHECK: popq
|
||||
; And tail-call to the target.
|
||||
|
Loading…
Reference in New Issue
Block a user