diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 338ed4cdae4..b6960695625 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -355,5 +355,8 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpWindowSave: OutStreamer.EmitCFIWindowSave(); break; + case MCCFIInstruction::OpSameValue: + OutStreamer.EmitCFISameValue(Inst.getRegister()); + break; } } diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index dcae66bc858..c685d3e4c34 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -14,6 +14,7 @@ #include "ARMFrameLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -1612,3 +1613,366 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } +/// Get the minimum constant for ARM that is greater than or equal to the +/// argument. In ARM, constants can have any value that can be produced by +/// rotating an 8-bit value to the right by an even number of bits within a +/// 32-bit word. +static uint32_t alignToARMConstant(uint32_t Value) { + unsigned Shifted = 0; + + if (Value == 0) + return 0; + + while (!(Value & 0xC0000000)) { + Value = Value << 2; + Shifted += 2; + } + + bool Carry = (Value & 0x00FFFFFF); + Value = ((Value & 0xFF000000) >> 24) + Carry; + + if (Value & 0x0000100) + Value = Value & 0x000001FC; + + if (Shifted > 24) + Value = Value >> (Shifted - 24); + else + Value = Value << (24 - Shifted); + + return Value; +} + +// The stack limit in the TCB is set to this many bytes above the actual +// stack limit. +static const uint64_t kSplitStackAvailable = 256; + +// Adjust the function prologue to enable split stacks. This currently only +// supports android and linux. +// +// The ABI of the segmented stack prologue is a little arbitrarily chosen, but +// must be well defined in order to allow for consistent implementations of the +// __morestack helper function. The ABI is also not a normal ABI in that it +// doesn't follow the normal calling conventions because this allows the +// prologue of each function to be optimized further. +// +// Currently, the ABI looks like (when calling __morestack) +// +// * r4 holds the minimum stack size requested for this function call +// * r5 holds the stack size of the arguments to the function +// * the beginning of the function is 3 instructions after the call to +// __morestack +// +// Implementations of __morestack should use r4 to allocate a new stack, r5 to +// place the arguments on to the new stack, and the 3-instruction knowledge to +// jump directly to the body of the function when working on the new stack. +// +// An old (and possibly no longer compatible) implementation of __morestack for +// ARM can be found at [1]. +// +// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S +void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { + unsigned Opcode; + unsigned CFIIndex; + const ARMSubtarget *ST = &MF.getTarget().getSubtarget(); + bool Thumb = ST->isThumb(); + + // Sadly, this currently doesn't support varargs, platforms other than + // android/linux. Note that thumb1/thumb2 are support for android/linux. + if (MF.getFunction()->isVarArg()) + report_fatal_error("Segmented stacks do not support vararg functions."); + if (!ST->isTargetAndroid() && !ST->isTargetLinux()) + report_fatal_error("Segmented stacks not supported on this platfrom."); + + MachineBasicBlock &prologueMBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + MCContext &Context = MMI.getContext(); + const MCRegisterInfo *MRI = Context.getRegisterInfo(); + const ARMBaseInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + ARMFunctionInfo *ARMFI = MF.getInfo(); + DebugLoc DL; + + // Use R4 and R5 as scratch registers. + // We save R4 and R5 before use and restore them before leaving the function. + unsigned ScratchReg0 = ARM::R4; + unsigned ScratchReg1 = ARM::R5; + uint64_t AlignedStackSize; + + MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); + + for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), + e = prologueMBB.livein_end(); + i != e; ++i) { + AllocMBB->addLiveIn(*i); + GetMBB->addLiveIn(*i); + McrMBB->addLiveIn(*i); + PrevStackMBB->addLiveIn(*i); + PostStackMBB->addLiveIn(*i); + } + + MF.push_front(PostStackMBB); + MF.push_front(AllocMBB); + MF.push_front(GetMBB); + MF.push_front(McrMBB); + MF.push_front(PrevStackMBB); + + // The required stack size that is aligned to ARM constant criterion. + uint64_t StackSize = MFI->getStackSize(); + + AlignedStackSize = alignToARMConstant(StackSize); + + // When the frame size is less than 256 we just compare the stack + // boundary directly to the value of the stack pointer, per gcc. + bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable; + + // We will use two of the callee save registers as scratch registers so we + // need to save those registers onto the stack. + // We will use SR0 to hold stack limit and SR1 to hold the stack size + // requested and arguments for __morestack(). + // SR0: Scratch Register #0 + // SR1: Scratch Register #1 + // push {SR0, SR1} + if (Thumb) { + AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) + .addReg(ScratchReg0).addReg(ScratchReg1); + } else { + AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) + .addReg(ScratchReg0).addReg(ScratchReg1); + } + + // Emit the relevant DWARF information about the change in stack pointer as + // well as where to find both r4 and r5 (the callee-save registers) + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // mov SR1, sp + if (Thumb) { + AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) + .addReg(ARM::SP)); + } else if (CompareStackPointer) { + AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) + .addReg(ARM::SP)).addReg(0); + } + + // sub SR1, sp, #StackSize + if (!CompareStackPointer && Thumb) { + AddDefaultPred( + AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) + .addReg(ScratchReg1).addImm(AlignedStackSize)); + } else if (!CompareStackPointer) { + AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); + } + + if (Thumb && ST->isThumb1Only()) { + unsigned PCLabelId = ARMFI->createPICLabelUId(); + ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( + MF.getFunction()->getContext(), "STACK_LIMIT", PCLabelId, 0); + MachineConstantPool *MCP = MF.getConstantPool(); + unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment()); + + // ldr SR0, [pc, offset(STACK_LIMIT)] + AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) + .addConstantPoolIndex(CPI)); + + // ldr SR0, [SR0] + AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) + .addReg(ScratchReg0).addImm(0)); + } else { + // Get TLS base address from the coprocessor + // mrc p15, #0, SR0, c13, c0, #3 + AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) + .addImm(15) + .addImm(0) + .addImm(13) + .addImm(0) + .addImm(3)); + + // Use the last tls slot on android and a private field of the TCP on linux. + assert(ST->isTargetAndroid() || ST->isTargetLinux()); + unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1; + + // Get the stack limit from the right offset + // ldr SR0, [sr0, #4 * TlsOffset] + AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) + .addReg(ScratchReg0).addImm(4 * TlsOffset)); + } + + // Compare stack limit with stack size requested. + // cmp SR0, SR1 + Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; + AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) + .addReg(ScratchReg0) + .addReg(ScratchReg1)); + + // This jump is taken if StackLimit < SP - stack required. + Opcode = Thumb ? ARM::tBcc : ARM::Bcc; + BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB) + .addImm(ARMCC::LO) + .addReg(ARM::CPSR); + + + // Calling __morestack(StackSize, Size of stack arguments). + // __morestack knows that the stack size requested is in SR0(r4) + // and amount size of stack arguments is in SR1(r5). + + // Pass first argument for the __morestack by Scratch Register #0. + // The amount size of stack required + if (Thumb) { + AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), + ScratchReg0)).addImm(AlignedStackSize)); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + .addImm(AlignedStackSize)).addReg(0); + } + // Pass second argument for the __morestack by Scratch Register #1. + // The amount size of stack consumed to save function arguments. + if (Thumb) { + AddDefaultPred( + AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) + .addReg(0); + } + + // push {lr} - Save return address of this function. + if (Thumb) { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) + .addReg(ARM::LR); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP)) + .addReg(ARM::LR); + } + + // Emit the DWARF info about the change in stack as well as where to find the + // previous link register + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Call __morestack(). + if (Thumb) { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) + .addExternalSymbol("__morestack"); + } else { + BuildMI(AllocMBB, DL, TII.get(ARM::BL)) + .addExternalSymbol("__morestack"); + } + + // pop {lr} - Restore return address of this original function. + if (Thumb) { + if (ST->isThumb1Only()) { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) + .addReg(ScratchReg0); + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) + .addReg(ScratchReg0)); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(4)); + } + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP)) + .addReg(ARM::LR); + } + + // Restore SR0 and SR1 in case of __morestack() was called. + // __morestack() will skip PostStackMBB block so we need to restore + // scratch registers from here. + // pop {SR0, SR1} + if (Thumb) { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) + .addReg(ScratchReg0) + .addReg(ScratchReg1); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); + } + + // Update the CFA offset now that we've popped + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // bx lr - Return from this function. + Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); + + // Restore SR0 and SR1 in case of __morestack() was not called. + // pop {SR0, SR1} + if (Thumb) { + AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) + .addReg(ScratchReg0) + .addReg(ScratchReg1); + } else { + AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); + } + + // Update the CFA offset now that we've popped + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Tell debuggers that r4 and r5 are now the same as they were in the + // previous function, that they're the "Same Value". + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( + nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( + nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Organizing MBB lists + PostStackMBB->addSuccessor(&prologueMBB); + + AllocMBB->addSuccessor(PostStackMBB); + + GetMBB->addSuccessor(PostStackMBB); + GetMBB->addSuccessor(AllocMBB); + + McrMBB->addSuccessor(GetMBB); + + PrevStackMBB->addSuccessor(McrMBB); + +#ifdef XDEBUG + MF.verify(); +#endif +} diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 463428fd19e..524ee36dd32 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -57,6 +57,8 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const override; + void adjustForSegmentedStacks(MachineFunction &MF) const; + private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, unsigned StmOpc, diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 42bc5bc7860..1778659b54f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3101,6 +3101,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CCInfo.getNextStackOffset(), TotalArgRegsSaveSize); + AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); + return Chain; } diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 216430b4783..d7ec6eba941 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -114,6 +114,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// relocation models. unsigned GlobalBaseReg; + /// ArgumentStackSize - amount of bytes on stack consumed by the arguments + /// being passed on the stack + unsigned ArgumentStackSize; + public: ARMFunctionInfo() : isThumb(false), @@ -182,6 +186,9 @@ public: void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } + unsigned getArgumentStackSize() const { return ArgumentStackSize; } + void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } + unsigned createJumpTableUId() { return JumpTableUId++; } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3ffc72e21e0..21fa83dff73 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -356,6 +356,9 @@ public: return TargetTriple.getEnvironment() == Triple::GNUEABIHF || TargetTriple.getEnvironment() == Triple::EABIHF; } + bool isTargetAndroid() const { + return TargetTriple.getEnvironment() == Triple::Android; + } bool isAPCS_ABI() const { assert(TargetABI != ARM_ABI_UNKNOWN); diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll new file mode 100644 index 00000000000..b0dc467cfb5 --- /dev/null +++ b/test/CodeGen/ARM/debug-segmented-stacks.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} +!llvm.ident = !{!11} + +define void @test_basic() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; ARM-linux: test_basic: + +; ARM-linux: push {r4, r5} +; ARM-linux: .cfi_def_cfa_offset 8 +; ARM-linux: .cfi_offset r5, -4 +; ARM-linux: .cfi_offset r4, -8 +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB0_2 + +; ARM-linux: mov r4, #48 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux: .cfi_def_cfa_offset 12 +; ARM-linux: .cfi_offset lr, -12 +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux: .cfi_def_cfa_offset 0 +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} +; ARM-linux: .cfi_def_cfa_offset 0 +; ARM-linux .cfi_same_value r4 +; ARM-linux .cfi_same_value r5 +} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99] +!1 = metadata !{metadata !"var.c", metadata !"/tmp"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_basic", + metadata !"test_basic", metadata !"", i32 5, metadata !6, i1 false, i1 true, + i32 0, i32 0, null, i32 256, i1 false, void ()* @test_basic, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [sum] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/var.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8, metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!11 = metadata !{metadata !"clang version 3.5 "} +!12 = metadata !{i32 786689, metadata !4, metadata !"count", metadata !5, i32 16777221, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [count] [line 5] +!13 = metadata !{i32 5, i32 0, metadata !4, null} +!14 = metadata !{i32 786688, metadata !4, metadata !"vl", metadata !5, i32 6, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vl] [line 6] +!15 = metadata !{i32 786454, metadata !16, null, metadata !"va_list", i32 30, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list] +!16 = metadata !{metadata !"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", metadata !"/tmp"} +!17 = metadata !{i32 786454, metadata !1, null, metadata !"__builtin_va_list", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list] +!18 = metadata !{i32 786451, metadata !1, null, metadata !"__va_list", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ] +!19 = metadata !{metadata !20} +!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"__ap", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ] +!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ] +!22 = metadata !{i32 6, i32 0, metadata !4, null} +!23 = metadata !{i32 7, i32 0, metadata !4, null} +!24 = metadata !{i32 786688, metadata !4, metadata !"test_basic", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 8] +!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ] +!26 = metadata !{i32 786688, metadata !27, metadata !"i", metadata !5, i32 9, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 9] +!27 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/var.c] +!28 = metadata !{i32 9, i32 0, metadata !27, null} +!29 = metadata !{i32 10, i32 0, metadata !30, null} +!30 = metadata !{i32 786443, metadata !1, metadata !27, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/var.c] +!31 = metadata !{i32 11, i32 0, metadata !30, null} +!32 = metadata !{i32 12, i32 0, metadata !4, null} +!33 = metadata !{i32 13, i32 0, metadata !4, null} + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) diff --git a/test/CodeGen/ARM/segmented-stacks-dynamic.ll b/test/CodeGen/ARM/segmented-stacks-dynamic.ll new file mode 100644 index 00000000000..13b5bcf2adb --- /dev/null +++ b/test/CodeGen/ARM/segmented-stacks-dynamic.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux +; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define i32 @test_basic(i32 %l) { + %mem = alloca i32, i32 %l + call void @dummy_use (i32* %mem, i32 %l) + %terminate = icmp eq i32 %l, 0 + br i1 %terminate, label %true, label %false + +true: + ret i32 0 + +false: + %newlen = sub i32 %l, 1 + %retvalue = call i32 @test_basic(i32 %newlen) + ret i32 %retvalue + +; ARM-linux: test_basic: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB0_2 + +; ARM-linux: mov r4, #24 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + + +; ARM-android: test_basic: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: mov r5, sp +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB0_2 + +; ARM-android: mov r4, #24 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} diff --git a/test/CodeGen/ARM/segmented-stacks.ll b/test/CodeGen/ARM/segmented-stacks.ll new file mode 100644 index 00000000000..5eff63303bc --- /dev/null +++ b/test/CodeGen/ARM/segmented-stacks.ll @@ -0,0 +1,235 @@ +; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux + +; We used to crash with filetype=obj +; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj + + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define void @test_basic() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; ARM-linux: test_basic: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB0_2 + +; ARM-linux: mov r4, #48 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_basic: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: mov r5, sp +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB0_2 + +; ARM-android: mov r4, #48 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} + +define i32 @test_nested(i32 * nest %closure, i32 %other) { + %addend = load i32 * %closure + %result = add i32 %other, %addend + ret i32 %result + +; ARM-linux: test_nested: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB1_2 + +; ARM-linux: mov r4, #0 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_nested: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: mov r5, sp +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB1_2 + +; ARM-android: mov r4, #0 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} + +define void @test_large() { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 0) + ret void + +; ARM-linux: test_large: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: sub r5, sp, #40192 +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB2_2 + +; ARM-linux: mov r4, #40192 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_large: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: sub r5, sp, #40192 +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB2_2 + +; ARM-android: mov r4, #40192 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} + +define fastcc void @test_fastcc() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; ARM-linux: test_fastcc: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: mov r5, sp +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB3_2 + +; ARM-linux: mov r4, #48 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_fastcc: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: mov r5, sp +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB3_2 + +; ARM-android: mov r4, #48 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} + +define fastcc void @test_fastcc_large() { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 0) + ret void + +; ARM-linux: test_fastcc_large: + +; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-linux-NEXT: sub r5, sp, #40192 +; ARM-linux-NEXT: ldr r4, [r4, #4] +; ARM-linux-NEXT: cmp r4, r5 +; ARM-linux-NEXT: blo .LBB4_2 + +; ARM-linux: mov r4, #40192 +; ARM-linux-NEXT: mov r5, #0 +; ARM-linux-NEXT: stmdb sp!, {lr} +; ARM-linux-NEXT: bl __morestack +; ARM-linux-NEXT: ldm sp!, {lr} +; ARM-linux-NEXT: pop {r4, r5} +; ARM-linux-NEXT: bx lr + +; ARM-linux: pop {r4, r5} + +; ARM-android: test_fastcc_large: + +; ARM-android: push {r4, r5} +; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-android-NEXT: sub r5, sp, #40192 +; ARM-android-NEXT: ldr r4, [r4, #252] +; ARM-android-NEXT: cmp r4, r5 +; ARM-android-NEXT: blo .LBB4_2 + +; ARM-android: mov r4, #40192 +; ARM-android-NEXT: mov r5, #0 +; ARM-android-NEXT: stmdb sp!, {lr} +; ARM-android-NEXT: bl __morestack +; ARM-android-NEXT: ldm sp!, {lr} +; ARM-android-NEXT: pop {r4, r5} +; ARM-android-NEXT: bx lr + +; ARM-android: pop {r4, r5} + +} diff --git a/test/CodeGen/Thumb/segmented-stacks-dynamic.ll b/test/CodeGen/Thumb/segmented-stacks-dynamic.ll new file mode 100644 index 00000000000..067c07b689e --- /dev/null +++ b/test/CodeGen/Thumb/segmented-stacks-dynamic.ll @@ -0,0 +1,63 @@ +; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux +; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android +; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -filetype=obj + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define i32 @test_basic(i32 %l) { + %mem = alloca i32, i32 %l + call void @dummy_use (i32* %mem, i32 %l) + %terminate = icmp eq i32 %l, 0 + br i1 %terminate, label %true, label %false + +true: + ret i32 0 + +false: + %newlen = sub i32 %l, 1 + %retvalue = call i32 @test_basic(i32 %newlen) + ret i32 %retvalue + +; Thumb-linux: test_basic: + +; Thumb-linux: push {r4, r5} +; Thumb-linux: mov r5, sp +; Thumb-linux-NEXT: ldr r4, .LCPI0_0 +; Thumb-linux-NEXT: ldr r4, [r4] +; Thumb-linux-NEXT: cmp r4, r5 +; Thumb-linux-NEXT: blo .LBB0_2 + +; Thumb-linux: mov r4, #16 +; Thumb-linux-NEXT: mov r5, #0 +; Thumb-linux-NEXT: push {lr} +; Thumb-linux-NEXT: bl __morestack +; Thumb-linux-NEXT: pop {r4} +; Thumb-linux-NEXT: mov lr, r4 +; Thumb-linux-NEXT: pop {r4, r5} +; Thumb-linux-NEXT: bx lr + +; Thumb-linux: pop {r4, r5} + +; Thumb-android: test_basic: + +; Thumb-android: push {r4, r5} +; Thumb-android: mov r5, sp +; Thumb-android-NEXT: ldr r4, .LCPI0_0 +; Thumb-android-NEXT: ldr r4, [r4] +; Thumb-android-NEXT: cmp r4, r5 +; Thumb-android-NEXT: blo .LBB0_2 + +; Thumb-android: mov r4, #16 +; Thumb-android-NEXT: mov r5, #0 +; Thumb-android-NEXT: push {lr} +; Thumb-android-NEXT: bl __morestack +; Thumb-android-NEXT: pop {r4} +; Thumb-android-NEXT: mov lr, r4 +; Thumb-android-NEXT: pop {r4, r5} +; Thumb-android-NEXT: bx lr + +; Thumb-android: pop {r4, r5} + +} diff --git a/test/CodeGen/Thumb/segmented-stacks.ll b/test/CodeGen/Thumb/segmented-stacks.ll new file mode 100644 index 00000000000..5649b0088df --- /dev/null +++ b/test/CodeGen/Thumb/segmented-stacks.ll @@ -0,0 +1,247 @@ +; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android +; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux +; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -filetype=obj +; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -filetype=obj + + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define void @test_basic() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; Thumb-android: test_basic: + +; Thumb-android: push {r4, r5} +; Thumb-android-NEXT: mov r5, sp +; Thumb-android-NEXT: ldr r4, .LCPI0_0 +; Thumb-android-NEXT: ldr r4, [r4] +; Thumb-android-NEXT: cmp r4, r5 +; Thumb-android-NEXT: blo .LBB0_2 + +; Thumb-android: mov r4, #48 +; Thumb-android-NEXT: mov r5, #0 +; Thumb-android-NEXT: push {lr} +; Thumb-android-NEXT: bl __morestack +; Thumb-android-NEXT: pop {r4} +; Thumb-android-NEXT: mov lr, r4 +; Thumb-android-NEXT: pop {r4, r5} +; Thumb-android-NEXT: bx lr + +; Thumb-android: pop {r4, r5} + +; Thumb-linux: test_basic: + +; Thumb-linux: push {r4, r5} +; Thumb-linux-NEXT: mov r5, sp +; Thumb-linux-NEXT: ldr r4, .LCPI0_0 +; Thumb-linux-NEXT: ldr r4, [r4] +; Thumb-linux-NEXT: cmp r4, r5 +; Thumb-linux-NEXT: blo .LBB0_2 + +; Thumb-linux: mov r4, #48 +; Thumb-linux-NEXT: mov r5, #0 +; Thumb-linux-NEXT: push {lr} +; Thumb-linux-NEXT: bl __morestack +; Thumb-linux-NEXT: pop {r4} +; Thumb-linux-NEXT: mov lr, r4 +; Thumb-linux-NEXT: pop {r4, r5} +; Thumb-linux-NEXT: bx lr + +; Thumb-linux: pop {r4, r5} + +} + +define i32 @test_nested(i32 * nest %closure, i32 %other) { + %addend = load i32 * %closure + %result = add i32 %other, %addend + ret i32 %result + +; Thumb-android: test_nested: + +; Thumb-android: push {r4, r5} +; Thumb-android-NEXT: mov r5, sp +; Thumb-android-NEXT: ldr r4, .LCPI1_0 +; Thumb-android-NEXT: ldr r4, [r4] +; Thumb-android-NEXT: cmp r4, r5 +; Thumb-android-NEXT: blo .LBB1_2 + +; Thumb-android: mov r4, #0 +; Thumb-android-NEXT: mov r5, #0 +; Thumb-android-NEXT: push {lr} +; Thumb-android-NEXT: bl __morestack +; Thumb-android-NEXT: pop {r4} +; Thumb-android-NEXT: mov lr, r4 +; Thumb-android-NEXT: pop {r4, r5} +; Thumb-android-NEXT: bx lr + +; Thumb-android: pop {r4, r5} + +; Thumb-linux: test_nested: + +; Thumb-linux: push {r4, r5} +; Thumb-linux-NEXT: mov r5, sp +; Thumb-linux-NEXT: ldr r4, .LCPI1_0 +; Thumb-linux-NEXT: ldr r4, [r4] +; Thumb-linux-NEXT: cmp r4, r5 +; Thumb-linux-NEXT: blo .LBB1_2 + +; Thumb-linux: mov r4, #0 +; Thumb-linux-NEXT: mov r5, #0 +; Thumb-linux-NEXT: push {lr} +; Thumb-linux-NEXT: bl __morestack +; Thumb-linux-NEXT: pop {r4} +; Thumb-linux-NEXT: mov lr, r4 +; Thumb-linux-NEXT: pop {r4, r5} +; Thumb-linux-NEXT: bx lr + +; Thumb-linux: pop {r4, r5} + +} + +define void @test_large() { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 0) + ret void + +; Thumb-android: test_large: + +; Thumb-android: push {r4, r5} +; Thumb-android-NEXT: mov r5, sp +; Thumb-android-NEXT: sub r5, #40192 +; Thumb-android-NEXT: ldr r4, .LCPI2_2 +; Thumb-android-NEXT: ldr r4, [r4] +; Thumb-android-NEXT: cmp r4, r5 +; Thumb-android-NEXT: blo .LBB2_2 + +; Thumb-android: mov r4, #40192 +; Thumb-android-NEXT: mov r5, #0 +; Thumb-android-NEXT: push {lr} +; Thumb-android-NEXT: bl __morestack +; Thumb-android-NEXT: pop {r4} +; Thumb-android-NEXT: mov lr, r4 +; Thumb-android-NEXT: pop {r4, r5} +; Thumb-android-NEXT: bx lr + +; Thumb-android: pop {r4, r5} + +; Thumb-linux: test_large: + +; Thumb-linux: push {r4, r5} +; Thumb-linux-NEXT: mov r5, sp +; Thumb-linux-NEXT: sub r5, #40192 +; Thumb-linux-NEXT: ldr r4, .LCPI2_2 +; Thumb-linux-NEXT: ldr r4, [r4] +; Thumb-linux-NEXT: cmp r4, r5 +; Thumb-linux-NEXT: blo .LBB2_2 + +; Thumb-linux: mov r4, #40192 +; Thumb-linux-NEXT: mov r5, #0 +; Thumb-linux-NEXT: push {lr} +; Thumb-linux-NEXT: bl __morestack +; Thumb-linux-NEXT: pop {r4} +; Thumb-linux-NEXT: mov lr, r4 +; Thumb-linux-NEXT: pop {r4, r5} +; Thumb-linux-NEXT: bx lr + +; Thumb-linux: pop {r4, r5} + +} + +define fastcc void @test_fastcc() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; Thumb-android: test_fastcc: + +; Thumb-android: push {r4, r5} +; Thumb-android-NEXT: mov r5, sp +; Thumb-android-NEXT: ldr r4, .LCPI3_0 +; Thumb-android-NEXT: ldr r4, [r4] +; Thumb-android-NEXT: cmp r4, r5 +; Thumb-android-NEXT: blo .LBB3_2 + +; Thumb-android: mov r4, #48 +; Thumb-android-NEXT: mov r5, #0 +; Thumb-android-NEXT: push {lr} +; Thumb-android-NEXT: bl __morestack +; Thumb-android-NEXT: pop {r4} +; Thumb-android-NEXT: mov lr, r4 +; Thumb-android-NEXT: pop {r4, r5} +; Thumb-android-NEXT: bx lr + +; Thumb-android: pop {r4, r5} + +; Thumb-linux: test_fastcc: + +; Thumb-linux: push {r4, r5} +; Thumb-linux-NEXT: mov r5, sp +; Thumb-linux-NEXT: ldr r4, .LCPI3_0 +; Thumb-linux-NEXT: ldr r4, [r4] +; Thumb-linux-NEXT: cmp r4, r5 +; Thumb-linux-NEXT: blo .LBB3_2 + +; Thumb-linux: mov r4, #48 +; Thumb-linux-NEXT: mov r5, #0 +; Thumb-linux-NEXT: push {lr} +; Thumb-linux-NEXT: bl __morestack +; Thumb-linux-NEXT: pop {r4} +; Thumb-linux-NEXT: mov lr, r4 +; Thumb-linux-NEXT: pop {r4, r5} +; Thumb-linux-NEXT: bx lr + +; Thumb-linux: pop {r4, r5} + +} + +define fastcc void @test_fastcc_large() { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 0) + ret void + +; Thumb-android: test_fastcc_large: + +; Thumb-android: push {r4, r5} +; Thumb-android-NEXT: mov r5, sp +; Thumb-android-NEXT: sub r5, #40192 +; Thumb-android-NEXT: ldr r4, .LCPI4_2 +; Thumb-android-NEXT: ldr r4, [r4] +; Thumb-android-NEXT: cmp r4, r5 +; Thumb-android-NEXT: blo .LBB4_2 + +; Thumb-android: mov r4, #40192 +; Thumb-android-NEXT: mov r5, #0 +; Thumb-android-NEXT: push {lr} +; Thumb-android-NEXT: bl __morestack +; Thumb-android-NEXT: pop {r4} +; Thumb-android-NEXT: mov lr, r4 +; Thumb-android-NEXT: pop {r4, r5} +; Thumb-android-NEXT: bx lr + +; Thumb-android: pop {r4, r5} + +; Thumb-linux: test_fastcc_large: + +; Thumb-linux: push {r4, r5} +; Thumb-linux-NEXT: mov r5, sp +; Thumb-linux-NEXT: sub r5, #40192 +; Thumb-linux-NEXT: ldr r4, .LCPI4_2 +; Thumb-linux-NEXT: ldr r4, [r4] +; Thumb-linux-NEXT: cmp r4, r5 +; Thumb-linux-NEXT: blo .LBB4_2 + +; Thumb-linux: mov r4, #40192 +; Thumb-linux-NEXT: mov r5, #0 +; Thumb-linux-NEXT: push {lr} +; Thumb-linux-NEXT: bl __morestack +; Thumb-linux-NEXT: pop {r4} +; Thumb-linux-NEXT: mov lr, r4 +; Thumb-linux-NEXT: pop {r4, r5} +; Thumb-linux-NEXT: bx lr + +; Thumb-linux: pop {r4, r5} + +} diff --git a/test/CodeGen/Thumb2/segmented-stacks.ll b/test/CodeGen/Thumb2/segmented-stacks.ll new file mode 100644 index 00000000000..602fc84e254 --- /dev/null +++ b/test/CodeGen/Thumb2/segmented-stacks.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android +; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -segmented-stacks -filetype=obj + + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define void @test_basic() { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; Thumb-android: test_basic: + +; Thumb-android: push {r4, r5} +; Thumb-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; Thumb-android-NEXT: mov r5, sp +; Thumb-android-NEXT: ldr r4, [r4, #252] +; Thumb-android-NEXT: cmp r4, r5 +; Thumb-android-NEXT: blo .LBB0_2 + +; Thumb-android: mov r4, #48 +; Thumb-android-NEXT: mov r5, #0 +; Thumb-android-NEXT: push {lr} +; Thumb-android-NEXT: bl __morestack +; Thumb-android-NEXT: ldr lr, [sp], #4 +; Thumb-android-NEXT: pop {r4, r5} +; Thumb-android-NEXT: bx lr + +; Thumb-android: pop {r4, r5} + +}