diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 99c2b8f955e..75ceeb9b182 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1017,7 +1017,24 @@ def : Pat<(X86call (i64 tglobaladdr:$dst)), def : Pat<(X86call (i64 texternalsym:$dst)), (CALL64pcrel32 texternalsym:$dst)>; -// tailcall stuff +// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they +// can never use callee-saved registers. That is the purpose of the GR64_TC +// register classes. +// +// The only volatile register that is never used by the calling convention is +// %r11. This happens when calling a vararg function with 6 arguments. +// +// Match an X86tcret that uses less than 7 volatile registers. +def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), + (X86tcret node:$ptr, node:$off), [{ + // X86tcret args: (*chain, ptr, imm, regs..., glue) + unsigned NumRegs = 0; + for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i) + if (isa(N->getOperand(i)) && ++NumRegs > 6) + return false; + return true; +}]>; + def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In32BitMode]>; @@ -1041,7 +1058,9 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In64BitMode]>; -def : Pat<(X86tcret (load addr:$dst), imm:$off), +// Don't fold loads into X86tcret requiring more than 6 regs. +// There wouldn't be enough scratch registers for base+index. +def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, Requires<[In64BitMode]>; diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll index 70307534156..df7af69a996 100644 --- a/test/CodeGen/X86/tailcall-64.ll +++ b/test/CodeGen/X86/tailcall-64.ll @@ -93,4 +93,43 @@ define { i64, i64 } @crash(i8* %this) { ret { i64, i64 } %mrv7 } +; Check that we can fold an indexed load into a tail call instruction. +; CHECK: fold_indexed_load +; CHECK: leaq (%rsi,%rsi,4), %[[RAX:r..]] +; CHECK: movq _func_table@GOTPCREL(%rip), %[[RCX:r..]] +; CHECK: jmpq *16(%[[RCX]],%[[RAX]],8) # TAILCALL +%struct.funcs = type { i32 (i8*, i32*, i32)*, i32 (i8*)*, i32 (i8*)*, i32 (i8*, i32)*, i32 } +@func_table = external global [0 x %struct.funcs] +define void @fold_indexed_load(i8* %mbstr, i64 %idxprom) nounwind uwtable ssp { +entry: + %dsplen = getelementptr inbounds [0 x %struct.funcs]* @func_table, i64 0, i64 %idxprom, i32 2 + %x1 = load i32 (i8*)** %dsplen, align 8 + %call = tail call i32 %x1(i8* %mbstr) nounwind + ret void +} +; Fold an indexed load into the tail call instruction. +; Calling a varargs function with 6 arguments requires 7 registers (%al is the +; vector count for varargs functions). This leaves %r11 as the only available +; scratch register. +; +; It is not possible to fold an indexed load into TCRETURNmi64 in that case. +; +; typedef int (*funcptr)(void*, ...); +; extern const funcptr funcs[]; +; int f(int n) { +; return funcs[n](0, 0, 0, 0, 0, 0); +; } +; +; CHECK: rdar12282281 +; CHECK: jmpq *%r11 # TAILCALL +@funcs = external constant [0 x i32 (i8*, ...)*] + +define i32 @rdar12282281(i32 %n) nounwind uwtable ssp { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds [0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 %idxprom + %0 = load i32 (i8*, ...)** %arrayidx, align 8 + %call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind + ret i32 %call +}