diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index d836c29d6bd..88cfe10dfa9 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -204,6 +204,9 @@ namespace { bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + bool SelectSingleRegAddr(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); @@ -1319,6 +1322,31 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, return true; } +/// SelectSingleRegAddr - Like SelectAddr, but reject any address that would +/// require more than one allocatable register. +/// +/// This is used for a TCRETURNmi64 instruction when used to tail call a +/// variadic function with 6 arguments: Only %r11 is available from GR64_TC. +/// The other scratch register, %rax, is needed to pass in the number of vector +/// registers used in the variadic arguments. +/// +bool X86DAGToDAGISel::SelectSingleRegAddr(SDNode *Parent, SDValue N, + SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + if (!SelectAddr(Parent, N, Base, Scale, Index, Disp, Segment)) + return false; + // Anything %RIP relative is fine. + if (RegisterSDNode *Reg = dyn_cast(Base)) + if (Reg->getReg() == X86::RIP) + return true; + // Check that the index register is 0. + if (RegisterSDNode *Reg = dyn_cast(Index)) + if (Reg->getReg() == 0) + return true; + return false; +} + /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 99c2b8f955e..25404557856 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1041,7 +1041,13 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In64BitMode]>; -def : Pat<(X86tcret (load addr:$dst), imm:$off), +// When calling a variadic function with 6 arguments, 7 scratch registers are +// needed since %al holds the number of vector registers used. That leaves %r11 +// as the only remaining GR64_TC register for the addressing mode. +// +// The single_reg_addr pattern rejects any addressing modes that would need +// more than one register. +def : Pat<(X86tcret (load single_reg_addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index aabb442f741..b91f3c0ad45 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -543,6 +543,10 @@ def tls64addr : ComplexPattern; +// Same as addr, but reject addressing modes requiring more than one register. +def single_reg_addr : ComplexPattern; + //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. def HasCMov : Predicate<"Subtarget->hasCMov()">; diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll index 70307534156..ffbc4b8b3f0 100644 --- a/test/CodeGen/X86/tailcall-64.ll +++ b/test/CodeGen/X86/tailcall-64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -verify-machineinstrs | FileCheck %s target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin11.4.0" @@ -93,4 +93,38 @@ define { i64, i64 } @crash(i8* %this) { ret { i64, i64 } %mrv7 } +; Fold an indexed load into the tail call instruction. +; Calling a varargs function with 6 arguments requires 7 registers (%al is the +; vector count for varargs functions). This leaves %r11 as the only available +; scratch register. +; +; It is not possible to fold an indexed load into TCRETURNmi64 in that case. +; +; typedef int (*funcptr)(void*, ...); +; extern const funcptr funcs[]; +; int f(int n) { +; return funcs[n](0, 0, 0, 0, 0, 0); +; } +; +; CHECK: rdar12282281 +; CHECK: jmpq *%r11 # TAILCALL +@funcs = external constant [0 x i32 (i8*, ...)*] +define i32 @rdar12282281(i32 %n) nounwind uwtable ssp { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds [0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 %idxprom + %0 = load i32 (i8*, ...)** %arrayidx, align 8 + %call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind + ret i32 %call +} + +; Same thing, using a fixed offset. The load should foid. +; CHECK: rdar12282281fixed +; CHECK: jmpq *8(%r11) # TAILCALL +define i32 @rdar12282281fixed() nounwind uwtable ssp { +entry: + %0 = load i32 (i8*, ...)** getelementptr inbounds ([0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 1), align 8 + %call.i = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind + ret i32 %call.i +}