From 7754f85885f8a961cb403ef13ab39583492d2b1e Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 26 Jan 2011 02:04:09 +0000 Subject: [PATCH] Target/X86: Tweak win64's tailcall. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124272 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 27 ++++++++++++++++++++++++--- lib/Target/X86/X86InstrCompiler.td | 4 ++-- lib/Target/X86/X86InstrControl.td | 13 +++++++------ lib/Target/X86/X86InstrInfo.cpp | 1 + lib/Target/X86/X86InstrInfo.td | 7 ++++++- lib/Target/X86/X86RegisterInfo.cpp | 6 ++++++ lib/Target/X86/X86RegisterInfo.td | 3 +++ test/CodeGen/X86/tailcall-ri64.ll | 24 ++++++++++++++++++++++++ test/CodeGen/X86/tailcallstack64.ll | 16 ++++++++++------ 9 files changed, 83 insertions(+), 18 deletions(-) create mode 100644 test/CodeGen/X86/tailcall-ri64.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index edd535e43cd..3fcb5e7a8d3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2476,9 +2476,6 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, MachineFunction &MF = DAG.getMachineFunction(); if (MF.getInfo()->getBytesToPopOnReturn()) return false; - if (Subtarget->isTargetWin64()) - // Win64 ABI has additional complications. - return false; // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. @@ -10078,6 +10075,30 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); + case X86::TAILJMPd64: + case X86::TAILJMPr64: + case X86::TAILJMPm64: + assert(!"TAILJMP64 would not be touched here."); + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset. + // On AMD64, additional defs should be added before register allocation. + if (!Subtarget->isTargetWin64()) { + MI->addRegisterDefined(X86::RSI); + MI->addRegisterDefined(X86::RDI); + MI->addRegisterDefined(X86::XMM6); + MI->addRegisterDefined(X86::XMM7); + MI->addRegisterDefined(X86::XMM8); + MI->addRegisterDefined(X86::XMM9); + MI->addRegisterDefined(X86::XMM10); + MI->addRegisterDefined(X86::XMM11); + MI->addRegisterDefined(X86::XMM12); + MI->addRegisterDefined(X86::XMM13); + MI->addRegisterDefined(X86::XMM14); + MI->addRegisterDefined(X86::XMM15); + } + return BB; case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); case X86::TLSCall_32: diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 70478b8e4a9..f72eaa75529 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -866,8 +866,8 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>, Requires<[In32BitMode]>; -def : Pat<(X86tcret GR64_TC:$dst, imm:$off), - (TCRETURNri64 GR64_TC:$dst, imm:$off)>, +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In64BitMode]>; def : Pat<(X86tcret (load addr:$dst), imm:$off), diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 4d1c5f74091..77f47250e9f 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -266,17 +266,18 @@ let isCall = 1, isCodeGenOnly = 1 in let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1 in - let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + // AMD64 cc clobbers RSI, RDI, XMM6-XMM15. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP], + usesCustomInserter = 1 in { def TCRETURNdi64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>; def TCRETURNri64 : PseudoI<(outs), - (ins GR64_TC:$dst, i32imm:$offset, variable_ops), []>; + (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>; let mayLoad = 1 in def TCRETURNmi64 : PseudoI<(outs), (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>; @@ -284,7 +285,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "jmp\t$dst # TAILCALL", []>; - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops), + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; let mayLoad = 1 in diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 63dcd143b5d..ceb1b653982 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2025,6 +2025,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, case X86::GR64_NOREX_NOSPRegClassID: case X86::GR64_NOSPRegClassID: case X86::GR64_TCRegClassID: + case X86::GR64_TCW64RegClassID: return load ? X86::MOV64rm : X86::MOV64mr; case X86::GR32RegClassID: case X86::GR32_ABCDRegClassID: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4748f130ed8..14c90662d02 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -273,6 +273,10 @@ def i8mem_NOREX : Operand { let ParserMatchClass = X86MemAsmOperand; } +// GPRs available for tailcall. +// It represents GR64_TC or GR64_TCW64. +def ptr_rc_tailcall : PointerLikeRegClass<2>; + // Special i32mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. @@ -287,7 +291,8 @@ def i32mem_TC : Operand { // after callee-saved register are popped. def i64mem_TC : Operand { let PrintMethod = "printi64mem"; - let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, + ptr_rc_tailcall, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index f41be0cfe5a..2f6bd88c652 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -320,6 +320,12 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const { if (TM.getSubtarget().is64Bit()) return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; + case 2: // Available for tailcall (not callee-saved GPRs). + if (TM.getSubtarget().isTargetWin64()) + return &X86::GR64_TCW64RegClass; + if (TM.getSubtarget().is64Bit()) + return &X86::GR64_TCRegClass; + return &X86::GR32_TCRegClass; } } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 45bb9898b84..612fac2f3be 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -496,6 +496,9 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, (GR32_TC sub_32bit)]; } +def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, + R8, R9, R11]>; + // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, [AL, CL, DL, AH, CH, DH, BL, BH]> { diff --git a/test/CodeGen/X86/tailcall-ri64.ll b/test/CodeGen/X86/tailcall-ri64.ll new file mode 100644 index 00000000000..914d8f7b8bc --- /dev/null +++ b/test/CodeGen/X86/tailcall-ri64.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=AMD64 +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64 +; PR8743 +; TAILJMPri64 should not receive "callee-saved" registers beyond epilogue. + +; AMD64: jmpq +; AMD64-NOT: %{{e[a-z]|rbx|rbp|r10|r12|r13|r14|r15}} + +; WIN64: jmpq +; WIN64-NOT: %{{e[a-z]|rbx|rsi|rdi|rbp|r12|r13|r14|r15}} + +%class = type { [8 x i8] } +%vt = type { i32 (...)** } + +define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class* +%this, %vt* %Ty) align 2 { +entry: + %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)*** + %vtable = load %vt* (%vt*, %class*)*** %0, align 8 + %vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4 + %1 = load %vt* (%vt*, %class*)** %vfn, align 8 + %call = tail call %vt* %1(%vt* %Ty, %class* %this) + ret %vt* %call +} diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll index 52b074def57..0c732d56b6c 100644 --- a/test/CodeGen/X86/tailcallstack64.ll +++ b/test/CodeGen/X86/tailcallstack64.ll @@ -1,16 +1,20 @@ -; RUN: llc < %s -tailcallopt -march=x86-64 -post-RA-scheduler=true | FileCheck %s +; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s +; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s + +; FIXME: Redundant unused stack allocation could be eliminated. +; CHECK: subq ${{24|88}}, %rsp ; Check that lowered arguments on the stack do not overwrite each other. ; Add %in1 %p1 to a different temporary register (%eax). -; CHECK: movl 32(%rsp), %eax +; CHECK: movl [[A1:32|144]](%rsp), %eax ; Move param %in1 to temp register (%r10d). -; CHECK: movl 40(%rsp), %r10d +; CHECK: movl [[A2:40|152]](%rsp), %r10d ; Add %in1 %p1 to a different temporary register (%eax). -; CHECK: addl %edi, %eax +; CHECK: addl {{%edi|%ecx}}, %eax ; Move param %in2 to stack. -; CHECK: movl %r10d, 32(%rsp) +; CHECK: movl %r10d, [[A1]](%rsp) ; Move result of addition to stack. -; CHECK: movl %eax, 40(%rsp) +; CHECK: movl %eax, [[A2]](%rsp) ; Eventually, do a TAILCALL ; CHECK: TAILCALL