mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-30 04:35:00 +00:00
ARM: use AAPCS-style prologues for embedded MachO.
Darwin prologues save their GPRs in two stages: a narrow push of r0-r7 & lr, followed by a wide push of the remaining registers if there are any. AAPCS uses a single push.w instruction. It turns out that, on average, enough registers get pushed that code is smaller in the AAPCS prologue, which is a nice property for M-class programmers. They also have other options available for back-traces, so can hopefully deal with the fact that FP & LR aren't adjacent in memory. rdar://problem/15909583 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209895 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
98f8bc9323
commit
645c5b94e2
@ -45,9 +45,12 @@ using namespace llvm;
|
||||
|
||||
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
|
||||
: ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
|
||||
if (STI.isTargetMachO())
|
||||
FramePtr = ARM::R7;
|
||||
else if (STI.isTargetWindows())
|
||||
if (STI.isTargetMachO()) {
|
||||
if (STI.isTargetDarwin() || STI.isThumb1Only())
|
||||
FramePtr = ARM::R7;
|
||||
else
|
||||
FramePtr = ARM::R11;
|
||||
} else if (STI.isTargetWindows())
|
||||
FramePtr = ARM::R11;
|
||||
else // ARM EABI
|
||||
FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
|
||||
|
@ -220,7 +220,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
case ARM::R10:
|
||||
case ARM::R11:
|
||||
case ARM::R12:
|
||||
if (STI.isTargetMachO()) {
|
||||
if (STI.isTargetDarwin()) {
|
||||
GPRCS2Size += 4;
|
||||
break;
|
||||
}
|
||||
@ -380,7 +380,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
case ARM::R10:
|
||||
case ARM::R11:
|
||||
case ARM::R12:
|
||||
if (STI.isTargetMachO())
|
||||
if (STI.isTargetDarwin())
|
||||
break;
|
||||
// fallthrough
|
||||
case ARM::R0:
|
||||
@ -445,7 +445,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
case ARM::R10:
|
||||
case ARM::R11:
|
||||
case ARM::R12:
|
||||
if (STI.isTargetMachO()) {
|
||||
if (STI.isTargetDarwin()) {
|
||||
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
|
||||
unsigned Offset = MFI->getObjectOffset(FI);
|
||||
unsigned CFIIndex = MMI.addFrameInst(
|
||||
@ -810,7 +810,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
|
||||
unsigned LastReg = 0;
|
||||
for (; i != 0; --i) {
|
||||
unsigned Reg = CSI[i-1].getReg();
|
||||
if (!(Func)(Reg, STI.isTargetMachO())) continue;
|
||||
if (!(Func)(Reg, STI.isTargetDarwin())) continue;
|
||||
|
||||
// D-registers in the aligned area DPRCS2 are NOT spilled here.
|
||||
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
|
||||
@ -888,7 +888,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
|
||||
bool DeleteRet = false;
|
||||
for (; i != 0; --i) {
|
||||
unsigned Reg = CSI[i-1].getReg();
|
||||
if (!(Func)(Reg, STI.isTargetMachO())) continue;
|
||||
if (!(Func)(Reg, STI.isTargetDarwin())) continue;
|
||||
|
||||
// The aligned reloads from area DPRCS2 are not inserted here.
|
||||
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
|
||||
@ -1438,7 +1438,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||
if (Spilled) {
|
||||
NumGPRSpills++;
|
||||
|
||||
if (!STI.isTargetMachO()) {
|
||||
if (!STI.isTargetDarwin()) {
|
||||
if (Reg == ARM::LR)
|
||||
LRSpilled = true;
|
||||
CS1Spilled = true;
|
||||
@ -1460,7 +1460,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!STI.isTargetMachO()) {
|
||||
if (!STI.isTargetDarwin()) {
|
||||
UnspilledCS1GPRs.push_back(Reg);
|
||||
continue;
|
||||
}
|
||||
|
@ -12,11 +12,11 @@ declare void @bar(i8*)
|
||||
|
||||
define void @check_simple() minsize {
|
||||
; CHECK-LABEL: check_simple:
|
||||
; CHECK: push {r3, r4, r5, r6, r7, lr}
|
||||
; CHECK: push.w {r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NOT: sub sp, sp,
|
||||
; ...
|
||||
; CHECK-NOT: add sp, sp,
|
||||
; CHECK: pop {r0, r1, r2, r3, r7, pc}
|
||||
; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
|
||||
|
||||
; CHECK-T1-LABEL: check_simple:
|
||||
; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
|
||||
@ -44,11 +44,11 @@ define void @check_simple() minsize {
|
||||
|
||||
define void @check_simple_too_big() minsize {
|
||||
; CHECK-LABEL: check_simple_too_big:
|
||||
; CHECK: push {r7, lr}
|
||||
; CHECK: push.w {r11, lr}
|
||||
; CHECK: sub sp,
|
||||
; ...
|
||||
; CHECK: add sp,
|
||||
; CHECK: pop {r7, pc}
|
||||
; CHECK: pop.w {r11, pc}
|
||||
%var = alloca i8, i32 64
|
||||
call void @bar(i8* %var)
|
||||
ret void
|
||||
@ -93,11 +93,11 @@ define void @check_vfp_fold() minsize {
|
||||
; folded in except that doing so would clobber the value being returned.
|
||||
define i64 @check_no_return_clobber() minsize {
|
||||
; CHECK-LABEL: check_no_return_clobber:
|
||||
; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr}
|
||||
; CHECK: push.w {r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; CHECK-NOT: sub sp,
|
||||
; ...
|
||||
; CHECK: add sp, #24
|
||||
; CHECK: pop {r7, pc}
|
||||
; CHECK: pop.w {r11, pc}
|
||||
|
||||
; Just to keep iOS FileCheck within previous function:
|
||||
; CHECK-IOS-LABEL: check_no_return_clobber:
|
||||
@ -176,9 +176,9 @@ define void @test_varsize(...) minsize {
|
||||
|
||||
; CHECK-LABEL: test_varsize:
|
||||
; CHECK: sub sp, #16
|
||||
; CHECK: push {r5, r6, r7, lr}
|
||||
; CHECK: push.w {r9, r10, r11, lr}
|
||||
; ...
|
||||
; CHECK: pop.w {r2, r3, r7, lr}
|
||||
; CHECK: pop.w {r2, r3, r11, lr}
|
||||
; CHECK: add sp, #16
|
||||
; CHECK: bx lr
|
||||
|
||||
|
@ -35,15 +35,15 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
|
||||
; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
|
||||
; appropriate sentinel so no special return needed).
|
||||
; CHECK-M-LABEL: irq_fn:
|
||||
; CHECK-M: push {r4, r6, r7, lr}
|
||||
; CHECK-M: add r7, sp, #8
|
||||
; CHECK-M: push.w {r4, r10, r11, lr}
|
||||
; CHECK-M: add.w r11, sp, #8
|
||||
; CHECK-M: mov r4, sp
|
||||
; CHECK-M: bic r4, r4, #7
|
||||
; CHECK-M: mov sp, r4
|
||||
; CHECK-M: blx _bar
|
||||
; CHECK-M: sub.w r4, r7, #8
|
||||
; CHECK-M: sub.w r4, r11, #8
|
||||
; CHECK-M: mov sp, r4
|
||||
; CHECK-M: pop {r4, r6, r7, pc}
|
||||
; CHECK-M: pop.w {r4, r10, r11, pc}
|
||||
|
||||
call arm_aapcscc void @bar()
|
||||
ret void
|
||||
|
@ -48,8 +48,8 @@ define i32 @test_frame_ptr() {
|
||||
; CHECK-LABEL: test_frame_ptr:
|
||||
call void @test_trap()
|
||||
|
||||
; Frame pointer is r7 as for Darwin
|
||||
; CHECK: mov r7, sp
|
||||
; Frame pointer is r11.
|
||||
; CHECK: mov r11, sp
|
||||
ret i32 42
|
||||
}
|
||||
|
||||
@ -63,11 +63,9 @@ define void @test_two_areas(%big_arr* %addr) {
|
||||
; This goes with the choice of r7 as FP (largely). FP and LR have to be stored
|
||||
; consecutively on the stack for the frame record to be valid, which means we
|
||||
; need the 2 register-save areas employed by iOS.
|
||||
; CHECK-NON-FAST: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NON-FAST: push.w {r8, r9, r10, r11}
|
||||
; CHECK-NON-FAST: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
; ...
|
||||
; CHECK-NON-FAST: pop.w {r8, r9, r10, r11}
|
||||
; CHECK-NON-FAST: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-NON-FAST: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user