Target/X86: [PR8777][PR8778] Tweak alloca/chkstk for Windows targets.

FIXME: Some cleanups would be needed.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128206 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
NAKAMURA Takumi 2011-03-24 07:07:00 +00:00
parent c37c539944
commit a2e0762fae
5 changed files with 183 additions and 64 deletions

View File

@ -551,60 +551,66 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// responsible for adjusting the stack pointer. Touching the stack at 4K
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
if (NumBytes >= 4096 &&
(STI.isTargetCygMing() || STI.isTargetWin32()) &&
!STI.isTargetEnvMacho()) {
if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
const char *StackProbeSymbol;
bool isSPUpdateNeeded = false;
if (Is64Bit) {
if (STI.isTargetCygMing())
StackProbeSymbol = "___chkstk";
else {
StackProbeSymbol = "__chkstk";
isSPUpdateNeeded = true;
}
} else if (STI.isTargetCygMing())
StackProbeSymbol = "_alloca";
else
StackProbeSymbol = "_chkstk";
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
const char *StackProbeSymbol =
STI.isTargetWindows() ? "_chkstk" : "_alloca";
if (Is64Bit && STI.isTargetCygMing())
StackProbeSymbol = "__chkstk";
unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
if (!isEAXAlive) {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes);
BuildMI(MBB, MBBI, DL, TII.get(CallOp))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
if (isEAXAlive) {
// Sanity check that EAX is not livein for this function.
// It should not be, so throw an assert.
assert(!Is64Bit && "EAX is livein in x64 case!");
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
.addReg(X86::EAX, RegState::Kill);
// Allocate NumBytes-4 bytes on stack. We'll also use 4 already
// allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes - 4);
BuildMI(MBB, MBBI, DL, TII.get(CallOp))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
X86::EAX),
StackPtr, false, NumBytes - 4);
MBB.insert(MBBI, MI);
}
} else if (NumBytes >= 4096 &&
STI.isTargetWin64() &&
!STI.isTargetEnvMacho()) {
// Sanity check that EAX is not livein for this function. It should
// not be, so throw an assert.
assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
// Handle the 64-bit Windows ABI case where we need to call __chkstk.
// Function prologue is responsible for adjusting the stack pointer.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes);
BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
.addExternalSymbol("__chkstk")
.addReg(StackPtr, RegState::Define | RegState::Implicit);
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
TII, *RegInfo);
if (Is64Bit) {
// Handle the 64-bit Windows ABI case where we need to call __chkstk.
// Function prologue is responsible for adjusting the stack pointer.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
.addImm(NumBytes);
} else {
// Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
// We'll also use 4 already allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
}
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// MSVC x64's __chkstk needs to adjust %rsp.
// FIXME: %rax preserves the offset and should be available.
if (isSPUpdateNeeded)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
TII, *RegInfo);
if (isEAXAlive) {
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
X86::EAX),
StackPtr, false, NumBytes - 4);
MBB.insert(MBBI, MI);
}
} else if (NumBytes)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
TII, *RegInfo);

View File

@ -550,12 +550,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC,
(Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
(Subtarget->isTargetCOFF()
&& !Subtarget->isTargetEnvMacho()
? Custom : Expand));
if (!UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
@ -7929,6 +7928,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
"This should be used only on Windows targets");
assert(!Subtarget->isTargetEnvMacho());
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
@ -7939,8 +7939,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Flag;
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@ -10412,21 +10413,48 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(!Subtarget->isTargetEnvMacho());
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
const char *StackProbeSymbol =
if (Subtarget->isTargetWin64()) {
if (Subtarget->isTargetCygMing()) {
// ___chkstk(Mingw64):
// Clobbers R10, R11, RAX and EFLAGS.
// Updates RSP.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("___chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::RSP, RegState::Implicit)
.addReg(X86::RAX, RegState::Define | RegState::Implicit)
.addReg(X86::RSP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// __chkstk(MSVCRT): does not update stack pointer.
// Clobbers R10, R11 and EFLAGS.
// FIXME: RAX(allocated size) might be reused and not killed.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("__chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// RAX has the offset to subtracted from RSP.
BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(X86::RAX);
}
} else {
const char *StackProbeSymbol =
Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
}
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;

View File

@ -263,6 +263,16 @@ let isCall = 1, isCodeGenOnly = 1 in
Requires<[IsWin64]>;
}
let isCall = 1, isCodeGenOnly = 1 in
// __chkstk(MSVC): clobber R10, R11 and EFLAGS.
// ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
let Defs = [RAX, R10, R11, RSP, EFLAGS],
Uses = [RSP] in {
def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
"call{q}\t$dst", []>,
Requires<[IsWin64]>;
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in

View File

@ -0,0 +1,74 @@
; RUN: llc < %s -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
; RUN: llc < %s -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
; PR8777
; PR8778
define i64 @foo(i64 %n, i64 %x) nounwind {
entry:
%buf0 = alloca i8, i64 4096, align 1
; ___chkstk must adjust %rsp.
; M64: movq %rsp, %rbp
; M64: $4096, %rax
; M64: callq ___chkstk
; M64-NOT: %rsp
; __chkstk does not adjust %rsp.
; W64: movq %rsp, %rbp
; W64: $4096, %rax
; W64: callq __chkstk
; W64: subq $4096, %rsp
; Freestanding
; EFI: movq %rsp, %rbp
; EFI: $[[B0OFS:4096|4104]], %rsp
; EFI-NOT: call
%buf1 = alloca i8, i64 %n, align 1
; M64: leaq 15(%rcx), %rax
; M64: andq $-16, %rax
; M64: callq ___chkstk
; M64-NOT: %rsp
; M64: movq %rsp, %rax
; W64: leaq 15(%rcx), %rax
; W64: andq $-16, %rax
; W64: callq __chkstk
; W64: subq %rax, %rsp
; W64: movq %rsp, %rax
; EFI: leaq 15(%rcx), %rax
; EFI: andq $-16, %rax
; EFI: movq %rsp, [[R64:%r..]]
; EFI: subq %rax, [[R64]]
; EFI: movq [[R64]], %rsp
%r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
; M64: subq $48, %rsp
; M64: leaq -4096(%rbp), %r9
; M64: movq %rax, 32(%rsp)
; M64: callq bar
; W64: subq $48, %rsp
; W64: leaq -4096(%rbp), %r9
; W64: movq %rax, 32(%rsp)
; W64: callq bar
; EFI: subq $48, %rsp
; EFI: leaq -[[B0OFS]](%rbp), %r9
; EFI: movq [[R64]], 32(%rsp)
; EFI: callq _bar
ret i64 %r
; M64: movq %rbp, %rsp
; W64: movq %rbp, %rsp
}
declare i64 @bar(i64, i64, i64, i8* nocapture, i8* nocapture) nounwind

View File

@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
; RUN: llc < %s -mtriple=x86_64-pc-win32-macho | FileCheck %s -check-prefix=LINUX
; Windows and mingw require a prologue helper routine if more than 4096 bytes area
; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca
@ -16,7 +17,7 @@ entry:
; WIN_X32: calll __chkstk
; WIN_X64: callq __chkstk
; MINGW_X32: calll __alloca
; MINGW_X64: callq __chkstk
; MINGW_X64: callq ___chkstk
; LINUX-NOT: call __chkstk
%array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0]
ret i32 0