Target/X86: [PR8777][PR8778] Tweak alloca/chkstk for Windows targets.

FIXME: Some cleanups would be needed.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128206 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
NAKAMURA Takumi 2011-03-24 07:07:00 +00:00
parent c37c539944
commit a2e0762fae
5 changed files with 183 additions and 64 deletions

View File

@ -551,60 +551,66 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// responsible for adjusting the stack pointer. Touching the stack at 4K // responsible for adjusting the stack pointer. Touching the stack at 4K
// increments is necessary to ensure that the guard pages used by the OS // increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence. // virtual memory manager are allocated in correct sequence.
if (NumBytes >= 4096 && if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
(STI.isTargetCygMing() || STI.isTargetWin32()) && const char *StackProbeSymbol;
!STI.isTargetEnvMacho()) { bool isSPUpdateNeeded = false;
if (Is64Bit) {
if (STI.isTargetCygMing())
StackProbeSymbol = "___chkstk";
else {
StackProbeSymbol = "__chkstk";
isSPUpdateNeeded = true;
}
} else if (STI.isTargetCygMing())
StackProbeSymbol = "_alloca";
else
StackProbeSymbol = "_chkstk";
// Check whether EAX is livein for this function. // Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF); bool isEAXAlive = isEAXLiveIn(MF);
const char *StackProbeSymbol = if (isEAXAlive) {
STI.isTargetWindows() ? "_chkstk" : "_alloca"; // Sanity check that EAX is not livein for this function.
if (Is64Bit && STI.isTargetCygMing()) // It should not be, so throw an assert.
StackProbeSymbol = "__chkstk"; assert(!Is64Bit && "EAX is livein in x64 case!");
unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
if (!isEAXAlive) {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes);
BuildMI(MBB, MBBI, DL, TII.get(CallOp))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// Save EAX // Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
.addReg(X86::EAX, RegState::Kill); .addReg(X86::EAX, RegState::Kill);
// Allocate NumBytes-4 bytes on stack. We'll also use 4 already
// allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes - 4);
BuildMI(MBB, MBBI, DL, TII.get(CallOp))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
X86::EAX),
StackPtr, false, NumBytes - 4);
MBB.insert(MBBI, MI);
} }
} else if (NumBytes >= 4096 &&
STI.isTargetWin64() &&
!STI.isTargetEnvMacho()) {
// Sanity check that EAX is not livein for this function. It should
// not be, so throw an assert.
assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
// Handle the 64-bit Windows ABI case where we need to call __chkstk. if (Is64Bit) {
// Function prologue is responsible for adjusting the stack pointer. // Handle the 64-bit Windows ABI case where we need to call __chkstk.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) // Function prologue is responsible for adjusting the stack pointer.
.addImm(NumBytes); BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32)) .addImm(NumBytes);
.addExternalSymbol("__chkstk") } else {
.addReg(StackPtr, RegState::Define | RegState::Implicit); // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, // We'll also use 4 already allocated bytes for EAX.
TII, *RegInfo); BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
}
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// MSVC x64's __chkstk needs to adjust %rsp.
// FIXME: %rax preserves the offset and should be available.
if (isSPUpdateNeeded)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
TII, *RegInfo);
if (isEAXAlive) {
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
X86::EAX),
StackPtr, false, NumBytes - 4);
MBB.insert(MBBI, MI);
}
} else if (NumBytes) } else if (NumBytes)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
TII, *RegInfo); TII, *RegInfo);

View File

@ -550,12 +550,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->is64Bit()) setOperationAction(ISD::DYNAMIC_STACKALLOC,
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) (Subtarget->isTargetCOFF()
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); && !Subtarget->isTargetEnvMacho()
else ? Custom : Expand));
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
if (!UseSoftFloat && X86ScalarSSEf64) { if (!UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE. // f32 and f64 use SSE.
@ -7929,6 +7928,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const { SelectionDAG &DAG) const {
assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) && assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
"This should be used only on Windows targets"); "This should be used only on Windows targets");
assert(!Subtarget->isTargetEnvMacho());
DebugLoc dl = Op.getDebugLoc(); DebugLoc dl = Op.getDebugLoc();
// Get the inputs. // Get the inputs.
@ -7939,8 +7939,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Flag; SDValue Flag;
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1); Flag = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@ -10412,21 +10413,48 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc(); DebugLoc DL = MI->getDebugLoc();
assert(!Subtarget->isTargetEnvMacho());
// The lowering is pretty easy: we're just emitting the call to _alloca. The // The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP. // non-trivial part is impdef of ESP.
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
const char *StackProbeSymbol = if (Subtarget->isTargetWin64()) {
if (Subtarget->isTargetCygMing()) {
// ___chkstk(Mingw64):
// Clobbers R10, R11, RAX and EFLAGS.
// Updates RSP.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("___chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::RSP, RegState::Implicit)
.addReg(X86::RAX, RegState::Define | RegState::Implicit)
.addReg(X86::RSP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// __chkstk(MSVCRT): does not update stack pointer.
// Clobbers R10, R11 and EFLAGS.
// FIXME: RAX(allocated size) might be reused and not killed.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("__chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// RAX has the offset to subtracted from RSP.
BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(X86::RAX);
}
} else {
const char *StackProbeSymbol =
Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol) .addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit) .addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit) .addReg(X86::ESP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
}
MI->eraseFromParent(); // The pseudo instruction is gone now. MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB; return BB;

View File

@ -263,6 +263,16 @@ let isCall = 1, isCodeGenOnly = 1 in
Requires<[IsWin64]>; Requires<[IsWin64]>;
} }
let isCall = 1, isCodeGenOnly = 1 in
// __chkstk(MSVC): clobber R10, R11 and EFLAGS.
// ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
let Defs = [RAX, R10, R11, RSP, EFLAGS],
Uses = [RSP] in {
def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
"call{q}\t$dst", []>,
Requires<[IsWin64]>;
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in isCodeGenOnly = 1 in

View File

@ -0,0 +1,74 @@
; RUN: llc < %s -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
; RUN: llc < %s -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
; PR8777
; PR8778
define i64 @foo(i64 %n, i64 %x) nounwind {
entry:
%buf0 = alloca i8, i64 4096, align 1
; ___chkstk must adjust %rsp.
; M64: movq %rsp, %rbp
; M64: $4096, %rax
; M64: callq ___chkstk
; M64-NOT: %rsp
; __chkstk does not adjust %rsp.
; W64: movq %rsp, %rbp
; W64: $4096, %rax
; W64: callq __chkstk
; W64: subq $4096, %rsp
; Freestanding
; EFI: movq %rsp, %rbp
; EFI: $[[B0OFS:4096|4104]], %rsp
; EFI-NOT: call
%buf1 = alloca i8, i64 %n, align 1
; M64: leaq 15(%rcx), %rax
; M64: andq $-16, %rax
; M64: callq ___chkstk
; M64-NOT: %rsp
; M64: movq %rsp, %rax
; W64: leaq 15(%rcx), %rax
; W64: andq $-16, %rax
; W64: callq __chkstk
; W64: subq %rax, %rsp
; W64: movq %rsp, %rax
; EFI: leaq 15(%rcx), %rax
; EFI: andq $-16, %rax
; EFI: movq %rsp, [[R64:%r..]]
; EFI: subq %rax, [[R64]]
; EFI: movq [[R64]], %rsp
%r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
; M64: subq $48, %rsp
; M64: leaq -4096(%rbp), %r9
; M64: movq %rax, 32(%rsp)
; M64: callq bar
; W64: subq $48, %rsp
; W64: leaq -4096(%rbp), %r9
; W64: movq %rax, 32(%rsp)
; W64: callq bar
; EFI: subq $48, %rsp
; EFI: leaq -[[B0OFS]](%rbp), %r9
; EFI: movq [[R64]], 32(%rsp)
; EFI: callq _bar
ret i64 %r
; M64: movq %rbp, %rsp
; W64: movq %rbp, %rsp
}
declare i64 @bar(i64, i64, i64, i8* nocapture, i8* nocapture) nounwind

View File

@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64 ; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
; RUN: llc < %s -mtriple=x86_64-pc-win32-macho | FileCheck %s -check-prefix=LINUX
; Windows and mingw require a prologue helper routine if more than 4096 bytes area ; Windows and mingw require a prologue helper routine if more than 4096 bytes area
; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca ; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca
@ -16,7 +17,7 @@ entry:
; WIN_X32: calll __chkstk ; WIN_X32: calll __chkstk
; WIN_X64: callq __chkstk ; WIN_X64: callq __chkstk
; MINGW_X32: calll __alloca ; MINGW_X32: calll __alloca
; MINGW_X64: callq __chkstk ; MINGW_X64: callq ___chkstk
; LINUX-NOT: call __chkstk ; LINUX-NOT: call __chkstk
%array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0] %array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0]
ret i32 0 ret i32 0