diff --git a/docs/Extensions.rst b/docs/Extensions.rst index 7d8c521ec44..9dddab5b5ed 100644 --- a/docs/Extensions.rst +++ b/docs/Extensions.rst @@ -159,3 +159,34 @@ different COMDATs: .globl Symbol2 Symbol2: .long 1 + +Target Specific Behaviour +========================= + +Windows on ARM +-------------- + +Stack Probe Emission +^^^^^^^^^^^^^^^^^^^^ + +The reference implementation (Microsoft Visual Studio 2012) emits stack probes +in the following fashion: + +.. code-block:: gas + + movw r4, #constant + bl __chkstk + sub.w sp, sp, r4 + +However, this has the limitation of 32 MiB (±16MiB). In order to accomodate +larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 4GiB +range via a slight deviation. It will generate an indirect jump as follows: + +.. code-block:: gas + + movw r4, #constant + movw r12, :lower16:__chkstk + movt r12, :upper16:__chkstk + blx r12 + sub.w sp, sp, r4 + diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index aae7aa45e42..af4c97db684 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" @@ -142,6 +143,14 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) { return count; } +static bool WindowsRequiresStackProbe(const MachineFunction &MF, + size_t StackSizeInBytes) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->getStackProtectorIndex() > 0) + return StackSizeInBytes >= 4080; + return StackSizeInBytes >= 4096; +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -149,15 +158,16 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { ARMFunctionInfo *AFI = MF.getInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); + const TargetMachine &TM = MF.getTarget(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); const ARMBaseRegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); + static_cast(TM.getRegisterInfo()); const ARMBaseInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); + *static_cast(TM.getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = TM.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector &CSI = MFI->getCalleeSavedInfo(); @@ -187,7 +197,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); } - if (!AFI->hasStackFrame()) { + if (!AFI->hasStackFrame() && + (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); @@ -284,6 +295,50 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } else NumBytes = DPRCSOffset; + if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { + uint32_t NumWords = NumBytes >> 2; + + if (NumWords < 65536) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords)); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) + .addImm(NumWords); + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit); + break; + case CodeModel::Large: + case CodeModel::JITDefault: { + LLVMContext &Ctx = MF.getMMI().getModule()->getContext(); + const GlobalValue *F = + Function::Create(FunctionType::get(Type::getVoidTy(Ctx), false), + GlobalValue::AvailableExternallyLinkage, "__chkstk"); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) + .addGlobalAddress(F); + BuildMI(MBB, MBBI, dl, TII.get(ARM::BLX)) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit); + break; + } + } + + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup))); + NumBytes = 0; + } + unsigned adjustedGPRCS1Size = GPRCS1Size; if (NumBytes) { // Adjust SP after all the callee-save spills. diff --git a/test/CodeGen/ARM/Windows/chkstk.ll b/test/CodeGen/ARM/Windows/chkstk.ll new file mode 100644 index 00000000000..9c58fa08d37 --- /dev/null +++ b/test/CodeGen/ARM/Windows/chkstk.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 %s -o - \ +; RUN: | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s + +; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -code-model=large %s -o - \ +; RUN: | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s + +define arm_aapcs_vfpcc void @check_watermark() { +entry: + %buffer = alloca [4096 x i8], align 1 + ret void +} + +; CHECK-DEFAULT-CODE-MODEL: check_watermark: +; CHECK-DEFAULT-CODE-MODEL: movw r4, #1024 +; CHECK-DEFAULT-CODE-MODEL: bl __chkstk +; CHECK-DEFAULT-CODE-MODEL: sub.w sp, sp, r4 + +; CHECK-LARGE-CODE-MODEL: check_watermark: +; CHECK-LARGE-CODE-MODEL: movw r4, #1024 +; CHECK-LARGE-CODE-MODEL: movw r12, :lower16:__chkstk +; CHECK-LARGE-CODE-MODEL: movt r12, :upper16:__chkstk +; CHECK-LARGE-CODE-MODEL: blx r12 +; CHECK-LARGE-CODE-MODEL: sub.w sp, sp, r4 +