mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
ARM/Dwarf: correctly align stack before callee-saved VPRs
We were making an attempt to do this by adding an extra callee-saved GPR (so that there was an even number in the list), but when that failed we went ahead and pushed anyway. This had a couple of potential issues: + The .cfi directives we emit misplaced dN because they were based on PrologEpilogInserter's calculation. + Unaligned stores can be less efficient. + Unaligned stores can actually fault (likely only an issue in niche cases, but possible). This adds a final explicit stack adjustment if all other options fail, so that the actual locations of the registers match up with where they should be. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221320 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3839fd16a1
commit
1f771b80c0
@ -260,10 +260,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
|
|||||||
|
|
||||||
// Determine starting offsets of spill areas.
|
// Determine starting offsets of spill areas.
|
||||||
bool HasFP = hasFP(MF);
|
bool HasFP = hasFP(MF);
|
||||||
unsigned DPRCSOffset = NumBytes - (ArgRegsSaveSize + GPRCS1Size
|
unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
|
||||||
+ GPRCS2Size + DPRCSSize);
|
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
|
||||||
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
|
unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
|
||||||
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
|
unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
|
||||||
|
unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
|
||||||
int FramePtrOffsetInPush = 0;
|
int FramePtrOffsetInPush = 0;
|
||||||
if (HasFP) {
|
if (HasFP) {
|
||||||
FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI)
|
FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI)
|
||||||
@ -279,6 +280,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
|
|||||||
if (GPRCS2Size > 0)
|
if (GPRCS2Size > 0)
|
||||||
GPRCS2Push = LastPush = MBBI++;
|
GPRCS2Push = LastPush = MBBI++;
|
||||||
|
|
||||||
|
// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
|
||||||
|
// .cfi_offset operations will reflect that.
|
||||||
|
if (DPRGapSize) {
|
||||||
|
assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
|
||||||
|
if (!tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))
|
||||||
|
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
|
||||||
|
MachineInstr::FrameSetup);
|
||||||
|
}
|
||||||
|
|
||||||
// Move past area 3.
|
// Move past area 3.
|
||||||
if (DPRCSSize > 0) {
|
if (DPRCSSize > 0) {
|
||||||
DPRCSPush = MBBI;
|
DPRCSPush = MBBI;
|
||||||
@ -508,6 +518,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
|
|||||||
|
|
||||||
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
|
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
|
||||||
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
|
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
|
||||||
|
AFI->setDPRCalleeSavedGapSize(DPRGapSize);
|
||||||
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
|
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
|
||||||
|
|
||||||
// If we need dynamic stack realignment, do it here. Be paranoid and make
|
// If we need dynamic stack realignment, do it here. Be paranoid and make
|
||||||
@ -613,6 +624,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
NumBytes -= (ArgRegsSaveSize +
|
NumBytes -= (ArgRegsSaveSize +
|
||||||
AFI->getGPRCalleeSavedArea1Size() +
|
AFI->getGPRCalleeSavedArea1Size() +
|
||||||
AFI->getGPRCalleeSavedArea2Size() +
|
AFI->getGPRCalleeSavedArea2Size() +
|
||||||
|
AFI->getDPRCalleeSavedGapSize() +
|
||||||
AFI->getDPRCalleeSavedAreaSize());
|
AFI->getDPRCalleeSavedAreaSize());
|
||||||
|
|
||||||
// Reset SP based on frame pointer only if the stack frame extends beyond
|
// Reset SP based on frame pointer only if the stack frame extends beyond
|
||||||
@ -661,6 +673,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
|
while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
|
||||||
MBBI++;
|
MBBI++;
|
||||||
}
|
}
|
||||||
|
if (AFI->getDPRCalleeSavedGapSize()) {
|
||||||
|
assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
|
||||||
|
"unexpected DPR alignment gap");
|
||||||
|
emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
|
||||||
|
}
|
||||||
|
|
||||||
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
|
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
|
||||||
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
|
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
|
||||||
}
|
}
|
||||||
|
@ -86,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
|
|||||||
/// areas.
|
/// areas.
|
||||||
unsigned GPRCS1Size;
|
unsigned GPRCS1Size;
|
||||||
unsigned GPRCS2Size;
|
unsigned GPRCS2Size;
|
||||||
|
unsigned DPRCSAlignGapSize;
|
||||||
unsigned DPRCSSize;
|
unsigned DPRCSSize;
|
||||||
|
|
||||||
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
|
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
|
||||||
@ -134,7 +135,7 @@ public:
|
|||||||
RestoreSPFromFP(false),
|
RestoreSPFromFP(false),
|
||||||
LRSpilledForFarJump(false),
|
LRSpilledForFarJump(false),
|
||||||
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
|
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
|
||||||
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
|
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
|
||||||
NumAlignedDPRCS2Regs(0),
|
NumAlignedDPRCS2Regs(0),
|
||||||
JumpTableUId(0), PICLabelUId(0),
|
JumpTableUId(0), PICLabelUId(0),
|
||||||
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
|
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
|
||||||
@ -183,10 +184,12 @@ public:
|
|||||||
|
|
||||||
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
|
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
|
||||||
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
|
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
|
||||||
|
unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
|
||||||
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
|
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
|
||||||
|
|
||||||
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
|
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
|
||||||
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
|
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
|
||||||
|
void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
|
||||||
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
|
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
|
||||||
|
|
||||||
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
|
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
|
||||||
|
68
test/CodeGen/ARM/dwarf-unwind.ll
Normal file
68
test/CodeGen/ARM/dwarf-unwind.ll
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
; RUN: llc -mtriple=thumbv7-netbsd-eabi -o - %s | FileCheck %s
|
||||||
|
declare void @bar()
|
||||||
|
|
||||||
|
; ARM's frame lowering attempts to tack another callee-saved register onto the
|
||||||
|
; list when it detects a potential misaligned VFP store. However, if there are
|
||||||
|
; none available it used to just vpush anyway and misreport the location of the
|
||||||
|
; registers in unwind info. Since there are benefits to aligned stores, it's
|
||||||
|
; better to correct the code than the .cfi_offset directive.
|
||||||
|
|
||||||
|
define void @test_dpr_align(i8 %l, i8 %r) {
|
||||||
|
; CHECK-LABEL: test_dpr_align:
|
||||||
|
; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
; CHECK: sub sp, #4
|
||||||
|
; CHECK: vpush {d8}
|
||||||
|
; CHECK: .cfi_offset d8, -48
|
||||||
|
; CHECK-NOT: sub sp
|
||||||
|
; [...]
|
||||||
|
; CHECK: bl bar
|
||||||
|
; CHECK-NOT: add sp
|
||||||
|
; CHECK: vpop {d8}
|
||||||
|
; CHECK: add sp, #4
|
||||||
|
; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
|
||||||
|
call void @bar()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; The prologue (but not the epilogue) can be made more space efficient by
|
||||||
|
; chucking an argument register into the list. Not worth it in general though,
|
||||||
|
; "sub sp, #4" is likely faster.
|
||||||
|
define void @test_dpr_align_tiny(i8 %l, i8 %r) minsize {
|
||||||
|
; CHECK-LABEL: test_dpr_align_tiny:
|
||||||
|
; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
; CHECK-NOT: sub sp
|
||||||
|
; CHECK: vpush {d8}
|
||||||
|
; CHECK: .cfi_offset d8, -48
|
||||||
|
; CHECK-NOT: sub sp
|
||||||
|
; [...]
|
||||||
|
; CHECK: bl bar
|
||||||
|
; CHECK-NOT: add sp
|
||||||
|
; CHECK: vpop {d8}
|
||||||
|
; CHECK: add sp, #4
|
||||||
|
; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
|
||||||
|
call void @bar()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; However, we shouldn't do a 2-step align/adjust if there are no DPRs to be
|
||||||
|
; saved.
|
||||||
|
define void @test_nodpr_noalign(i8 %l, i8 %r) {
|
||||||
|
; CHECK-LABEL: test_nodpr_noalign:
|
||||||
|
; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
; CHECK-NOT: sub sp
|
||||||
|
; CHECK: sub sp, #12
|
||||||
|
; CHECK-NOT: sub sp
|
||||||
|
; [...]
|
||||||
|
; CHECK: bl bar
|
||||||
|
; CHECK-NOT: add sp
|
||||||
|
; CHECK: add sp, #12
|
||||||
|
; CHECK-NOT: add sp
|
||||||
|
; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
alloca i64
|
||||||
|
call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"()
|
||||||
|
call void @bar()
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user