From a2e0762fae050464494a50c8b5d53ac2f4ba738c Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 24 Mar 2011 07:07:00 +0000 Subject: [PATCH] Target/X86: [PR8777][PR8778] Tweak alloca/chkstk for Windows targets. FIXME: Some cleanups would be needed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128206 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 98 ++++++++++++---------- lib/Target/X86/X86ISelLowering.cpp | 62 ++++++++++---- lib/Target/X86/X86InstrControl.td | 10 +++ test/CodeGen/X86/win64_alloca_dynalloca.ll | 74 ++++++++++++++++ test/CodeGen/X86/win_chkstk.ll | 3 +- 5 files changed, 183 insertions(+), 64 deletions(-) create mode 100644 test/CodeGen/X86/win64_alloca_dynalloca.ll diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 3246c4379ce..071fbe0ceff 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -551,60 +551,66 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // responsible for adjusting the stack pointer. Touching the stack at 4K // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. - if (NumBytes >= 4096 && - (STI.isTargetCygMing() || STI.isTargetWin32()) && - !STI.isTargetEnvMacho()) { + if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) { + const char *StackProbeSymbol; + bool isSPUpdateNeeded = false; + + if (Is64Bit) { + if (STI.isTargetCygMing()) + StackProbeSymbol = "___chkstk"; + else { + StackProbeSymbol = "__chkstk"; + isSPUpdateNeeded = true; + } + } else if (STI.isTargetCygMing()) + StackProbeSymbol = "_alloca"; + else + StackProbeSymbol = "_chkstk"; + // Check whether EAX is livein for this function. bool isEAXAlive = isEAXLiveIn(MF); - const char *StackProbeSymbol = - STI.isTargetWindows() ? "_chkstk" : "_alloca"; - if (Is64Bit && STI.isTargetCygMing()) - StackProbeSymbol = "__chkstk"; - unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; - if (!isEAXAlive) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } else { + if (isEAXAlive) { + // Sanity check that EAX is not livein for this function. + // It should not be, so throw an assert. + assert(!Is64Bit && "EAX is livein in x64 case!"); + // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) .addReg(X86::EAX, RegState::Kill); - - // Allocate NumBytes-4 bytes on stack. We'll also use 4 already - // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes - 4); - BuildMI(MBB, MBBI, DL, TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); - MBB.insert(MBBI, MI); } - } else if (NumBytes >= 4096 && - STI.isTargetWin64() && - !STI.isTargetEnvMacho()) { - // Sanity check that EAX is not livein for this function. It should - // not be, so throw an assert. - assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!"); - // Handle the 64-bit Windows ABI case where we need to call __chkstk. - // Function prologue is responsible for adjusting the stack pointer. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32)) - .addExternalSymbol("__chkstk") - .addReg(StackPtr, RegState::Define | RegState::Implicit); - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, - TII, *RegInfo); + if (Is64Bit) { + // Handle the 64-bit Windows ABI case where we need to call __chkstk. + // Function prologue is responsible for adjusting the stack pointer. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) + .addImm(NumBytes); + } else { + // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. + // We'll also use 4 already allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(isEAXAlive ? NumBytes - 4 : NumBytes); + } + + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + // MSVC x64's __chkstk needs to adjust %rsp. + // FIXME: %rax preserves the offset and should be available. + if (isSPUpdateNeeded) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + TII, *RegInfo); + + if (isEAXAlive) { + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MBB.insert(MBBI, MI); + } } else if (NumBytes) emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII, *RegInfo); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 58acf4faac2..fdf8b9ad9d9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -550,12 +550,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (Subtarget->is64Bit()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, + (Subtarget->is64Bit() ? MVT::i64 : MVT::i32), + (Subtarget->isTargetCOFF() + && !Subtarget->isTargetEnvMacho() + ? Custom : Expand)); if (!UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. @@ -7929,6 +7928,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) && "This should be used only on Windows targets"); + assert(!Subtarget->isTargetEnvMacho()); DebugLoc dl = Op.getDebugLoc(); // Get the inputs. @@ -7939,8 +7939,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); - Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); + Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -10412,21 +10413,48 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); + assert(!Subtarget->isTargetEnvMacho()); + // The lowering is pretty easy: we're just emitting the call to _alloca. The // non-trivial part is impdef of ESP. - // FIXME: The code should be tweaked as soon as we'll try to do codegen for - // mingw-w64. - const char *StackProbeSymbol = + if (Subtarget->isTargetWin64()) { + if (Subtarget->isTargetCygMing()) { + // ___chkstk(Mingw64): + // Clobbers R10, R11, RAX and EFLAGS. + // Updates RSP. + BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) + .addExternalSymbol("___chkstk") + .addReg(X86::RAX, RegState::Implicit) + .addReg(X86::RSP, RegState::Implicit) + .addReg(X86::RAX, RegState::Define | RegState::Implicit) + .addReg(X86::RSP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + } else { + // __chkstk(MSVCRT): does not update stack pointer. + // Clobbers R10, R11 and EFLAGS. + // FIXME: RAX(allocated size) might be reused and not killed. + BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) + .addExternalSymbol("__chkstk") + .addReg(X86::RAX, RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + // RAX has the offset to subtracted from RSP. + BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP) + .addReg(X86::RSP) + .addReg(X86::RAX); + } + } else { + const char *StackProbeSymbol = Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; - BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(X86::EAX, RegState::Implicit) - .addReg(X86::ESP, RegState::Implicit) - .addReg(X86::EAX, RegState::Define | RegState::Implicit) - .addReg(X86::ESP, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) + .addExternalSymbol(StackProbeSymbol) + .addReg(X86::EAX, RegState::Implicit) + .addReg(X86::ESP, RegState::Implicit) + .addReg(X86::EAX, RegState::Define | RegState::Implicit) + .addReg(X86::ESP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + } MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 77f47250e9f..c228a0aed59 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -263,6 +263,16 @@ let isCall = 1, isCodeGenOnly = 1 in Requires<[IsWin64]>; } +let isCall = 1, isCodeGenOnly = 1 in + // __chkstk(MSVC): clobber R10, R11 and EFLAGS. + // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP. + let Defs = [RAX, R10, R11, RSP, EFLAGS], + Uses = [RSP] in { + def W64ALLOCA : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call{q}\t$dst", []>, + Requires<[IsWin64]>; + } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1 in diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll new file mode 100644 index 00000000000..87d632afa64 --- /dev/null +++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64 +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64 +; RUN: llc < %s -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI +; PR8777 +; PR8778 + +define i64 @foo(i64 %n, i64 %x) nounwind { +entry: + + %buf0 = alloca i8, i64 4096, align 1 + +; ___chkstk must adjust %rsp. +; M64: movq %rsp, %rbp +; M64: $4096, %rax +; M64: callq ___chkstk +; M64-NOT: %rsp + +; __chkstk does not adjust %rsp. +; W64: movq %rsp, %rbp +; W64: $4096, %rax +; W64: callq __chkstk +; W64: subq $4096, %rsp + +; Freestanding +; EFI: movq %rsp, %rbp +; EFI: $[[B0OFS:4096|4104]], %rsp +; EFI-NOT: call + + %buf1 = alloca i8, i64 %n, align 1 + +; M64: leaq 15(%rcx), %rax +; M64: andq $-16, %rax +; M64: callq ___chkstk +; M64-NOT: %rsp +; M64: movq %rsp, %rax + +; W64: leaq 15(%rcx), %rax +; W64: andq $-16, %rax +; W64: callq __chkstk +; W64: subq %rax, %rsp +; W64: movq %rsp, %rax + +; EFI: leaq 15(%rcx), %rax +; EFI: andq $-16, %rax +; EFI: movq %rsp, [[R64:%r..]] +; EFI: subq %rax, [[R64]] +; EFI: movq [[R64]], %rsp + + %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind + +; M64: subq $48, %rsp +; M64: leaq -4096(%rbp), %r9 +; M64: movq %rax, 32(%rsp) +; M64: callq bar + +; W64: subq $48, %rsp +; W64: leaq -4096(%rbp), %r9 +; W64: movq %rax, 32(%rsp) +; W64: callq bar + +; EFI: subq $48, %rsp +; EFI: leaq -[[B0OFS]](%rbp), %r9 +; EFI: movq [[R64]], 32(%rsp) +; EFI: callq _bar + + ret i64 %r + +; M64: movq %rbp, %rsp + +; W64: movq %rbp, %rsp + +} + +declare i64 @bar(i64, i64, i64, i8* nocapture, i8* nocapture) nounwind diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll index 82ce81d4ae7..e4e4483ff94 100644 --- a/test/CodeGen/X86/win_chkstk.ll +++ b/test/CodeGen/X86/win_chkstk.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32 ; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX +; RUN: llc < %s -mtriple=x86_64-pc-win32-macho | FileCheck %s -check-prefix=LINUX ; Windows and mingw require a prologue helper routine if more than 4096 bytes area ; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca @@ -16,7 +17,7 @@ entry: ; WIN_X32: calll __chkstk ; WIN_X64: callq __chkstk ; MINGW_X32: calll __alloca -; MINGW_X64: callq __chkstk +; MINGW_X64: callq ___chkstk ; LINUX-NOT: call __chkstk %array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0] ret i32 0