x86: Fix large model calls to __chkstk for dynamic allocas

In the large code model, we now put __chkstk in %r11 before calling it.

Refactor the code so that we only do this once. Simplify things by using
__chkstk_ms instead of __chkstk on cygming. We already use that symbol
in the prolog emission, and it simplifies our logic.

Second half of PR18582.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227519 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Reid Kleckner 2015-01-29 23:58:04 +00:00
parent 5d51c0ee04
commit c9fbc97e95
5 changed files with 84 additions and 87 deletions

View File

@ -408,18 +408,25 @@ static bool usesTheStack(const MachineFunction &MF) {
return false;
}
void X86FrameLowering::getStackProbeFunction(const MachineFunction &MF,
const X86Subtarget &STI,
unsigned &CallOp,
const char *&Symbol) {
if (STI.is64Bit())
CallOp = MF.getTarget().getCodeModel() == CodeModel::Large
? X86::CALL64r
: X86::CALL64pcrel32;
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL) {
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
unsigned CallOp;
if (Is64Bit)
CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
else
CallOp = X86::CALLpcrel32;
if (STI.is64Bit()) {
const char *Symbol;
if (Is64Bit) {
if (STI.isTargetCygMing()) {
Symbol = "___chkstk_ms";
} else {
@ -429,6 +436,37 @@ void X86FrameLowering::getStackProbeFunction(const MachineFunction &MF,
Symbol = "_alloca";
else
Symbol = "_chkstk";
MachineInstrBuilder CI;
// All current stack probes take AX and SP as input, clobber flags, and
// preserve all registers. x86_64 probes leave RSP unmodified.
if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
// For the large code model, we have to call through a register. Use R11,
// as it is scratch in all supported calling conventions.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
.addExternalSymbol(Symbol);
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
} else {
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
}
unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
CI.addReg(AX, RegState::Implicit)
.addReg(SP, RegState::Implicit)
.addReg(AX, RegState::Define | RegState::Implicit)
.addReg(SP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
if (Is64Bit) {
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
// themselves. It also does not clobber %rax so we can reuse it when
// adjusting %rsp.
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(X86::RAX);
}
}
/// emitPrologue - Push callee-saved registers onto the stack, which
@ -761,11 +799,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
if (NumBytes >= StackProbeSize && UseStackProbe) {
const char *StackProbeSymbol;
unsigned CallOp;
getStackProbeFunction(MF, STI, CallOp, StackProbeSymbol);
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
@ -794,33 +827,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.setMIFlag(MachineInstr::FrameSetup);
}
if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
// For the large code model, we have to call through a register. Use R11,
// as it is unused and clobbered by all probe functions.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
.addExternalSymbol(StackProbeSymbol);
BuildMI(MBB, MBBI, DL, TII.get(CallOp))
.addReg(X86::R11)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
.setMIFlag(MachineInstr::FrameSetup);
} else {
BuildMI(MBB, MBBI, DL, TII.get(CallOp))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
.setMIFlag(MachineInstr::FrameSetup);
}
// Save a pointer to the MI where we set AX.
MachineBasicBlock::iterator SetRAX = MBBI;
--SetRAX;
// Call __chkstk, __chkstk_ms, or __alloca.
emitStackProbeCall(MF, MBB, MBBI, DL);
// Apply the frame setup flag to all inserted instrs.
for (; SetRAX != MBBI; ++SetRAX)
SetRAX->setFlag(MachineInstr::FrameSetup);
if (Is64Bit) {
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
// themself. It also does not clobber %rax so we can reuse it when
// adjusting %rsp.
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr)
.addReg(StackPtr)
.addReg(X86::RAX)
.setMIFlag(MachineInstr::FrameSetup);
}
if (isEAXAlive) {
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),

View File

@ -27,9 +27,11 @@ public:
explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
: TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
static void getStackProbeFunction(const MachineFunction &MF,
const X86Subtarget &STI, unsigned &CallOp,
const char *&Symbol);
/// Emit a call to the target's stack probe function. This is required for all
/// large stack allocations on Windows. The caller is required to materialize
/// the number of bytes to probe in RAX/EAX.
static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL);
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,

View File

@ -15,6 +15,7 @@
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"
#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
@ -21062,47 +21063,7 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
assert(!Subtarget->isTargetMachO());
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
if (Subtarget->isTargetWin64()) {
if (Subtarget->isTargetCygMing()) {
// ___chkstk(Mingw64):
// Clobbers R10, R11, RAX and EFLAGS.
// Updates RSP.
BuildMI(*BB, MI, DL, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("___chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::RSP, RegState::Implicit)
.addReg(X86::RAX, RegState::Define | RegState::Implicit)
.addReg(X86::RSP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// __chkstk(MSVCRT): does not update stack pointer.
// Clobbers R10, R11 and EFLAGS.
BuildMI(*BB, MI, DL, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// RAX has the offset to be subtracted from RSP.
BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(X86::RAX);
}
} else {
const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() ||
Subtarget->isTargetWindowsItanium())
? "_chkstk"
: "_alloca";
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
}
X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;

View File

@ -6,6 +6,7 @@ declare x86_stdcallcc void @f(%Foo* inalloca %a)
declare x86_stdcallcc void @i(i32 %a)
define void @g() {
; CHECK-LABEL: _g:
%b = alloca inalloca %Foo
; CHECK: movl $8, %eax
; CHECK: calll __chkstk

View File

@ -1,5 +1,6 @@
; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 -code-model=large | FileCheck %s -check-prefix=L64
; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
; PR8777
; PR8778
@ -24,6 +25,13 @@ entry:
; W64: callq __chkstk
; W64: subq %rax, %rsp
; Use %r11 for the large model.
; L64: movq %rsp, %rbp
; L64: $4096, %rax
; L64: movabsq $__chkstk, %r11
; L64: callq *%r11
; L64: subq %rax, %rsp
; Freestanding
; EFI: movq %rsp, %rbp
; EFI: $[[B0OFS:4096|4104]], %rsp
@ -33,8 +41,8 @@ entry:
; M64: leaq 15(%{{.*}}), %rax
; M64: andq $-16, %rax
; M64: callq ___chkstk
; M64-NOT: %rsp
; M64: callq ___chkstk_ms
; M64: subq %rax, %rsp
; M64: movq %rsp, %rax
; W64: leaq 15(%{{.*}}), %rax
@ -43,6 +51,13 @@ entry:
; W64: subq %rax, %rsp
; W64: movq %rsp, %rax
; L64: leaq 15(%{{.*}}), %rax
; L64: andq $-16, %rax
; L64: movabsq $__chkstk, %r11
; L64: callq *%r11
; L64: subq %rax, %rsp
; L64: movq %rsp, %rax
; EFI: leaq 15(%{{.*}}), [[R1:%r.*]]
; EFI: andq $-16, [[R1]]
; EFI: movq %rsp, [[R64:%r.*]]
@ -84,7 +99,8 @@ entry:
; M64: leaq 15(%{{.*}}), %rax
; M64: andq $-16, %rax
; M64: callq ___chkstk
; M64: callq ___chkstk_ms
; M64: subq %rax, %rsp
; M64: movq %rsp, [[R2:%r.*]]
; M64: andq $-128, [[R2]]
; M64: movq [[R2]], %rsp