From 645c5b94e270db6648919efc140152bbac85bcd9 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 30 May 2014 13:23:06 +0000 Subject: [PATCH] ARM: use AAPCS-style prologues for embedded MachO. Darwin prologues save their GPRs in two stages: a narrow push of r0-r7 & lr, followed by a wide push of the remaining registers if there are any. AAPCS uses a single push.w instruction. It turns out that, on average, enough registers get pushed that code is smaller in the AAPCS prologue, which is a nice property for M-class programmers. They also have other options available for back-traces, so can hopefully deal with the fact that FP & LR aren't adjacent in memory. rdar://problem/15909583 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209895 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 9 ++++++--- lib/Target/ARM/ARMFrameLowering.cpp | 14 +++++++------- test/CodeGen/ARM/fold-stack-adjust.ll | 16 ++++++++-------- test/CodeGen/ARM/interrupt-attr.ll | 8 ++++---- test/CodeGen/ARM/none-macho.ll | 10 ++++------ 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index a2eee9ff304..cdd91c7a703 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -45,9 +45,12 @@ using namespace llvm; ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) { - if (STI.isTargetMachO()) - FramePtr = ARM::R7; - else if (STI.isTargetWindows()) + if (STI.isTargetMachO()) { + if (STI.isTargetDarwin() || STI.isThumb1Only()) + FramePtr = ARM::R7; + else + FramePtr = ARM::R11; + } else if (STI.isTargetWindows()) FramePtr = ARM::R11; else // ARM EABI FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0caf4bfd77a..6888ae994c5 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -220,7 +220,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetMachO()) { + if (STI.isTargetDarwin()) { GPRCS2Size += 4; break; } @@ -380,7 +380,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetMachO()) + if (STI.isTargetDarwin()) break; // fallthrough case ARM::R0: @@ -445,7 +445,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetMachO()) { + if (STI.isTargetDarwin()) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned Offset = MFI->getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( @@ -810,7 +810,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetMachO())) continue; + if (!(Func)(Reg, STI.isTargetDarwin())) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -888,7 +888,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetMachO())) continue; + if (!(Func)(Reg, STI.isTargetDarwin())) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -1438,7 +1438,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (Spilled) { NumGPRSpills++; - if (!STI.isTargetMachO()) { + if (!STI.isTargetDarwin()) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1460,7 +1460,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, break; } } else { - if (!STI.isTargetMachO()) { + if (!STI.isTargetDarwin()) { UnspilledCS1GPRs.push_back(Reg); continue; } diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll index 695a20b5976..eb0120f7c1b 100644 --- a/test/CodeGen/ARM/fold-stack-adjust.ll +++ b/test/CodeGen/ARM/fold-stack-adjust.ll @@ -12,11 +12,11 @@ declare void @bar(i8*) define void @check_simple() minsize { ; CHECK-LABEL: check_simple: -; CHECK: push {r3, r4, r5, r6, r7, lr} +; CHECK: push.w {r7, r8, r9, r10, r11, lr} ; CHECK-NOT: sub sp, sp, ; ... ; CHECK-NOT: add sp, sp, -; CHECK: pop {r0, r1, r2, r3, r7, pc} +; CHECK: pop.w {r0, r1, r2, r3, r11, pc} ; CHECK-T1-LABEL: check_simple: ; CHECK-T1: push {r3, r4, r5, r6, r7, lr} @@ -44,11 +44,11 @@ define void @check_simple() minsize { define void @check_simple_too_big() minsize { ; CHECK-LABEL: check_simple_too_big: -; CHECK: push {r7, lr} +; CHECK: push.w {r11, lr} ; CHECK: sub sp, ; ... ; CHECK: add sp, -; CHECK: pop {r7, pc} +; CHECK: pop.w {r11, pc} %var = alloca i8, i32 64 call void @bar(i8* %var) ret void @@ -93,11 +93,11 @@ define void @check_vfp_fold() minsize { ; folded in except that doing so would clobber the value being returned. define i64 @check_no_return_clobber() minsize { ; CHECK-LABEL: check_no_return_clobber: -; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr} +; CHECK: push.w {r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NOT: sub sp, ; ... ; CHECK: add sp, #24 -; CHECK: pop {r7, pc} +; CHECK: pop.w {r11, pc} ; Just to keep iOS FileCheck within previous function: ; CHECK-IOS-LABEL: check_no_return_clobber: @@ -176,9 +176,9 @@ define void @test_varsize(...) minsize { ; CHECK-LABEL: test_varsize: ; CHECK: sub sp, #16 -; CHECK: push {r5, r6, r7, lr} +; CHECK: push.w {r9, r10, r11, lr} ; ... -; CHECK: pop.w {r2, r3, r7, lr} +; CHECK: pop.w {r2, r3, r11, lr} ; CHECK: add sp, #16 ; CHECK: bx lr diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll index c5be6674da1..cb67dd929f4 100644 --- a/test/CodeGen/ARM/interrupt-attr.ll +++ b/test/CodeGen/ARM/interrupt-attr.ll @@ -35,15 +35,15 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" { ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to ; appropriate sentinel so no special return needed). ; CHECK-M-LABEL: irq_fn: -; CHECK-M: push {r4, r6, r7, lr} -; CHECK-M: add r7, sp, #8 +; CHECK-M: push.w {r4, r10, r11, lr} +; CHECK-M: add.w r11, sp, #8 ; CHECK-M: mov r4, sp ; CHECK-M: bic r4, r4, #7 ; CHECK-M: mov sp, r4 ; CHECK-M: blx _bar -; CHECK-M: sub.w r4, r7, #8 +; CHECK-M: sub.w r4, r11, #8 ; CHECK-M: mov sp, r4 -; CHECK-M: pop {r4, r6, r7, pc} +; CHECK-M: pop.w {r4, r10, r11, pc} call arm_aapcscc void @bar() ret void diff --git a/test/CodeGen/ARM/none-macho.ll b/test/CodeGen/ARM/none-macho.ll index 2795b8cd2d1..60c21716dc3 100644 --- a/test/CodeGen/ARM/none-macho.ll +++ b/test/CodeGen/ARM/none-macho.ll @@ -48,8 +48,8 @@ define i32 @test_frame_ptr() { ; CHECK-LABEL: test_frame_ptr: call void @test_trap() - ; Frame pointer is r7 as for Darwin -; CHECK: mov r7, sp + ; Frame pointer is r11. +; CHECK: mov r11, sp ret i32 42 } @@ -63,11 +63,9 @@ define void @test_two_areas(%big_arr* %addr) { ; This goes with the choice of r7 as FP (largely). FP and LR have to be stored ; consecutively on the stack for the frame record to be valid, which means we ; need the 2 register-save areas employed by iOS. -; CHECK-NON-FAST: push {r4, r5, r6, r7, lr} -; CHECK-NON-FAST: push.w {r8, r9, r10, r11} +; CHECK-NON-FAST: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; ... -; CHECK-NON-FAST: pop.w {r8, r9, r10, r11} -; CHECK-NON-FAST: pop {r4, r5, r6, r7, pc} +; CHECK-NON-FAST: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ret void }