Target/X86: Tweak win64's tailcall.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124272 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
NAKAMURA Takumi 2011-01-26 02:04:09 +00:00
parent 36c3bc431b
commit 7754f85885
9 changed files with 83 additions and 18 deletions

View File

@ -2476,9 +2476,6 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
return false;
if (Subtarget->isTargetWin64())
// Win64 ABI has additional complications.
return false;
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
@ -10078,6 +10075,30 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
assert(!"TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
// Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
// On AMD64, additional defs should be added before register allocation.
if (!Subtarget->isTargetWin64()) {
MI->addRegisterDefined(X86::RSI);
MI->addRegisterDefined(X86::RDI);
MI->addRegisterDefined(X86::XMM6);
MI->addRegisterDefined(X86::XMM7);
MI->addRegisterDefined(X86::XMM8);
MI->addRegisterDefined(X86::XMM9);
MI->addRegisterDefined(X86::XMM10);
MI->addRegisterDefined(X86::XMM11);
MI->addRegisterDefined(X86::XMM12);
MI->addRegisterDefined(X86::XMM13);
MI->addRegisterDefined(X86::XMM14);
MI->addRegisterDefined(X86::XMM15);
}
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::TLSCall_32:

View File

@ -866,8 +866,8 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>,
Requires<[In32BitMode]>;
def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
(TCRETURNri64 GR64_TC:$dst, imm:$off)>,
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In64BitMode]>;
def : Pat<(X86tcret (load addr:$dst), imm:$off),

View File

@ -266,17 +266,18 @@ let isCall = 1, isCodeGenOnly = 1 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
// AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in {
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
Uses = [RSP],
usesCustomInserter = 1 in {
def TCRETURNdi64 : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
[]>;
def TCRETURNri64 : PseudoI<(outs),
(ins GR64_TC:$dst, i32imm:$offset, variable_ops), []>;
(ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
let mayLoad = 1 in
def TCRETURNmi64 : PseudoI<(outs),
(ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
@ -284,7 +285,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
(ins i64i32imm_pcrel:$dst, variable_ops),
"jmp\t$dst # TAILCALL", []>;
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
let mayLoad = 1 in

View File

@ -2025,6 +2025,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
case X86::GR64_NOREX_NOSPRegClassID:
case X86::GR64_NOSPRegClassID:
case X86::GR64_TCRegClassID:
case X86::GR64_TCW64RegClassID:
return load ? X86::MOV64rm : X86::MOV64mr;
case X86::GR32RegClassID:
case X86::GR32_ABCDRegClassID:

View File

@ -273,6 +273,10 @@ def i8mem_NOREX : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
// GPRs available for tailcall.
// It represents GR64_TC or GR64_TCW64.
def ptr_rc_tailcall : PointerLikeRegClass<2>;
// Special i32mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
@ -287,7 +291,8 @@ def i32mem_TC : Operand<i32> {
// after callee-saved register are popped.
def i64mem_TC : Operand<i64> {
let PrintMethod = "printi64mem";
let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
ptr_rc_tailcall, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
}

View File

@ -320,6 +320,12 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
case 2: // Available for tailcall (not callee-saved GPRs).
if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
return &X86::GR64_TCW64RegClass;
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_TCRegClass;
return &X86::GR32_TCRegClass;
}
}

View File

@ -496,6 +496,9 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
(GR32_TC sub_32bit)];
}
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
R8, R9, R11]>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
[AL, CL, DL, AH, CH, DH, BL, BH]> {

View File

@ -0,0 +1,24 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=AMD64
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
; PR8743
; TAILJMPri64 should not receive "callee-saved" registers beyond epilogue.
; AMD64: jmpq
; AMD64-NOT: %{{e[a-z]|rbx|rbp|r10|r12|r13|r14|r15}}
; WIN64: jmpq
; WIN64-NOT: %{{e[a-z]|rbx|rsi|rdi|rbp|r12|r13|r14|r15}}
%class = type { [8 x i8] }
%vt = type { i32 (...)** }
define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class*
%this, %vt* %Ty) align 2 {
entry:
%0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
%vtable = load %vt* (%vt*, %class*)*** %0, align 8
%vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4
%1 = load %vt* (%vt*, %class*)** %vfn, align 8
%call = tail call %vt* %1(%vt* %Ty, %class* %this)
ret %vt* %call
}

View File

@ -1,16 +1,20 @@
; RUN: llc < %s -tailcallopt -march=x86-64 -post-RA-scheduler=true | FileCheck %s
; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
; FIXME: Redundant unused stack allocation could be eliminated.
; CHECK: subq ${{24|88}}, %rsp
; Check that lowered arguments on the stack do not overwrite each other.
; Add %in1 %p1 to a different temporary register (%eax).
; CHECK: movl 32(%rsp), %eax
; CHECK: movl [[A1:32|144]](%rsp), %eax
; Move param %in1 to temp register (%r10d).
; CHECK: movl 40(%rsp), %r10d
; CHECK: movl [[A2:40|152]](%rsp), %r10d
; Add %in1 %p1 to a different temporary register (%eax).
; CHECK: addl %edi, %eax
; CHECK: addl {{%edi|%ecx}}, %eax
; Move param %in2 to stack.
; CHECK: movl %r10d, 32(%rsp)
; CHECK: movl %r10d, [[A1]](%rsp)
; Move result of addition to stack.
; CHECK: movl %eax, 40(%rsp)
; CHECK: movl %eax, [[A2]](%rsp)
; Eventually, do a TAILCALL
; CHECK: TAILCALL