ARM: use AAPCS-style prologues for embedded MachO.

Darwin prologues save their GPRs in two stages: a narrow push of r0-r7 & lr,
followed by a wide push of the remaining registers if there are any. AAPCS uses
a single push.w instruction.

It turns out that, on average, enough registers get pushed that code is smaller
in the AAPCS prologue, which is a nice property for M-class programmers. They
also have other options available for back-traces, so can hopefully deal with
the fact that FP & LR aren't adjacent in memory.

rdar://problem/15909583

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209895 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2014-05-30 13:23:06 +00:00
parent 98f8bc9323
commit 645c5b94e2
5 changed files with 29 additions and 28 deletions

View File

@ -45,9 +45,12 @@ using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
if (STI.isTargetMachO())
FramePtr = ARM::R7;
else if (STI.isTargetWindows())
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin() || STI.isThumb1Only())
FramePtr = ARM::R7;
else
FramePtr = ARM::R11;
} else if (STI.isTargetWindows())
FramePtr = ARM::R11;
else // ARM EABI
FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;

View File

@ -220,7 +220,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin()) {
GPRCS2Size += 4;
break;
}
@ -380,7 +380,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.isTargetMachO())
if (STI.isTargetDarwin())
break;
// fallthrough
case ARM::R0:
@ -445,7 +445,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin()) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned Offset = MFI->getObjectOffset(FI);
unsigned CFIIndex = MMI.addFrameInst(
@ -810,7 +810,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (!(Func)(Reg, STI.isTargetMachO())) continue;
if (!(Func)(Reg, STI.isTargetDarwin())) continue;
// D-registers in the aligned area DPRCS2 are NOT spilled here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@ -888,7 +888,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (!(Func)(Reg, STI.isTargetMachO())) continue;
if (!(Func)(Reg, STI.isTargetDarwin())) continue;
// The aligned reloads from area DPRCS2 are not inserted here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@ -1438,7 +1438,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (Spilled) {
NumGPRSpills++;
if (!STI.isTargetMachO()) {
if (!STI.isTargetDarwin()) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
@ -1460,7 +1460,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
break;
}
} else {
if (!STI.isTargetMachO()) {
if (!STI.isTargetDarwin()) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}

View File

@ -12,11 +12,11 @@ declare void @bar(i8*)
define void @check_simple() minsize {
; CHECK-LABEL: check_simple:
; CHECK: push {r3, r4, r5, r6, r7, lr}
; CHECK: push.w {r7, r8, r9, r10, r11, lr}
; CHECK-NOT: sub sp, sp,
; ...
; CHECK-NOT: add sp, sp,
; CHECK: pop {r0, r1, r2, r3, r7, pc}
; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
; CHECK-T1-LABEL: check_simple:
; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
@ -44,11 +44,11 @@ define void @check_simple() minsize {
define void @check_simple_too_big() minsize {
; CHECK-LABEL: check_simple_too_big:
; CHECK: push {r7, lr}
; CHECK: push.w {r11, lr}
; CHECK: sub sp,
; ...
; CHECK: add sp,
; CHECK: pop {r7, pc}
; CHECK: pop.w {r11, pc}
%var = alloca i8, i32 64
call void @bar(i8* %var)
ret void
@ -93,11 +93,11 @@ define void @check_vfp_fold() minsize {
; folded in except that doing so would clobber the value being returned.
define i64 @check_no_return_clobber() minsize {
; CHECK-LABEL: check_no_return_clobber:
; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr}
; CHECK: push.w {r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NOT: sub sp,
; ...
; CHECK: add sp, #24
; CHECK: pop {r7, pc}
; CHECK: pop.w {r11, pc}
; Just to keep iOS FileCheck within previous function:
; CHECK-IOS-LABEL: check_no_return_clobber:
@ -176,9 +176,9 @@ define void @test_varsize(...) minsize {
; CHECK-LABEL: test_varsize:
; CHECK: sub sp, #16
; CHECK: push {r5, r6, r7, lr}
; CHECK: push.w {r9, r10, r11, lr}
; ...
; CHECK: pop.w {r2, r3, r7, lr}
; CHECK: pop.w {r2, r3, r11, lr}
; CHECK: add sp, #16
; CHECK: bx lr

View File

@ -35,15 +35,15 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
; appropriate sentinel so no special return needed).
; CHECK-M-LABEL: irq_fn:
; CHECK-M: push {r4, r6, r7, lr}
; CHECK-M: add r7, sp, #8
; CHECK-M: push.w {r4, r10, r11, lr}
; CHECK-M: add.w r11, sp, #8
; CHECK-M: mov r4, sp
; CHECK-M: bic r4, r4, #7
; CHECK-M: mov sp, r4
; CHECK-M: blx _bar
; CHECK-M: sub.w r4, r7, #8
; CHECK-M: sub.w r4, r11, #8
; CHECK-M: mov sp, r4
; CHECK-M: pop {r4, r6, r7, pc}
; CHECK-M: pop.w {r4, r10, r11, pc}
call arm_aapcscc void @bar()
ret void

View File

@ -48,8 +48,8 @@ define i32 @test_frame_ptr() {
; CHECK-LABEL: test_frame_ptr:
call void @test_trap()
; Frame pointer is r7 as for Darwin
; CHECK: mov r7, sp
; Frame pointer is r11.
; CHECK: mov r11, sp
ret i32 42
}
@ -63,11 +63,9 @@ define void @test_two_areas(%big_arr* %addr) {
; This goes with the choice of r7 as FP (largely). FP and LR have to be stored
; consecutively on the stack for the frame record to be valid, which means we
; need the 2 register-save areas employed by iOS.
; CHECK-NON-FAST: push {r4, r5, r6, r7, lr}
; CHECK-NON-FAST: push.w {r8, r9, r10, r11}
; CHECK-NON-FAST: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; ...
; CHECK-NON-FAST: pop.w {r8, r9, r10, r11}
; CHECK-NON-FAST: pop {r4, r5, r6, r7, pc}
; CHECK-NON-FAST: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
ret void
}