Fix epilogue codegen to avoid leaving the stack pointer in an invalid

state. Previously Thumb2 would restore sp from fp like this:
mov sp, r7
sub, sp, #4
If an interrupt is taken after the 'mov' but before the 'sub', callee-saved
registers might be clobbered by the interrupt handler. Instead, try
restoring directly from sp:
add sp, #4
Or, if necessary (with VLA, etc.) use a scratch register to compute sp and
then restore it:
sub.w r4, r7, #8
mov sp, r7
rdar://8465407


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119977 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-11-22 18:12:04 +00:00
parent 4d5a299790
commit ab5c703fdb
9 changed files with 124 additions and 37 deletions

View File

@ -694,10 +694,11 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MachineFrameInfo *MFI = MF.getFrameInfo();
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
// scratch register.
// scratch register. Also spill R4 if Thumb2 function has varsized objects,
// since it's always posible to restore sp from fp in a single instruction.
// FIXME: It will be better just to find spare register here.
if (needsStackRealignment(MF) &&
AFI->isThumb2Function())
if (AFI->isThumb2Function() &&
(MFI->hasVarSizedObjects() || needsStackRealignment(MF)))
MF.getRegInfo().setPhysRegUsed(ARM::R4);
// Spill LR if Thumb1 function uses variable length argument lists.

View File

@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@ -212,15 +213,21 @@ void ARMFrameInfo::emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
if (HasFP)
if (HasFP && isARM)
// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
// Note it's not safe to do this in Thumb2 mode because it would have
// taken two instructions:
// mov sp, r7
// sub sp, #24
// If an interrupt is taken between the two instructions, then sp is in
// an inconsistent state (pointing to the middle of callee-saved area).
// The interrupt handler can end up clobbering the registers.
AFI->setShouldRestoreSPFromFP(true);
}
if (STI.isTargetELF() && hasFP(MF)) {
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
AFI->setShouldRestoreSPFromFP(true);
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@ -275,7 +282,7 @@ void ARMFrameInfo::emitPrologue(MachineFunction &MF) const {
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp.
if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
if (MFI->hasVarSizedObjects())
AFI->setShouldRestoreSPFromFP(true);
}
@ -326,9 +333,21 @@ void ARMFrameInfo::emitEpilogue(MachineFunction &MF,
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
else
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
else {
// It's not possible to restore SP from FP in a single instruction.
// For Darwin, this looks like:
// mov sp, r7
// sub sp, #24
// This is bad, if an interrupt is taken after the mov, sp is in an
// inconsistent state.
// Use the first callee-saved register as a scratch register.
assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
.addReg(ARM::R4);
}
} else {
// Thumb2 or ARM.
if (isARM)

View File

@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@ -117,13 +118,6 @@ void Thumb1FrameInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
// Adjust FP so it point to the stack slot that contains the previous FP.
if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
AFI->setShouldRestoreSPFromFP(true);
}
// Determine starting offsets of spill areas.
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
@ -132,12 +126,21 @@ void Thumb1FrameInfo::emitPrologue(MachineFunction &MF) const {
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
NumBytes = DPRCSOffset;
if (NumBytes) {
// Adjust FP so it point to the stack slot that contains the previous FP.
if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
if (NumBytes > 7)
// If offset is > 7 then sp cannot be adjusted in a single instruction,
// try restoring from fp instead.
AFI->setShouldRestoreSPFromFP(true);
}
if (NumBytes)
// Insert it after all the callee-save spills.
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
}
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
@ -219,10 +222,14 @@ void Thumb1FrameInfo::emitEpilogue(MachineFunction &MF,
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
if (NumBytes)
emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
if (NumBytes) {
assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes,
TII, *RegInfo, dl);
else
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4);
} else
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(FramePtr);
} else {

View File

@ -1,7 +1,7 @@
; RUN: llc < %s -march=arm
; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
; RUN: grep mov | count 3
; RUN: grep mov | count 2
; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
@str = internal constant [12 x i8] c"Hello World\00"

View File

@ -1,12 +1,15 @@
; RUN: llc < %s -march=thumb | not grep {ldr sp}
; RUN: llc < %s -mtriple=thumb-apple-darwin | \
; RUN: not grep {sub.*r7}
; RUN: llc < %s -march=thumb | grep {mov.*r6, sp}
; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
define void @t1(%struct.state* %v) {
; CHECK: t1:
; CHECK: push
; CHECK: add r7, sp, #12
; CHECK: mov r2, sp
; CHECK: subs r4, r2, r1
; CHECK: mov sp, r4
%tmp6 = load i32* null
%tmp8 = alloca float, i32 %tmp6
store i32 1, i32* null
@ -34,6 +37,18 @@ declare fastcc void @f2(float*, float*, float*, i32)
@str215 = external global [2 x i8]
define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
; CHECK: t2:
; CHECK: push
; CHECK: add r7, sp, #12
; CHECK: sub sp, #8
; CHECK: mov r6, sp
; CHECK: str r2, [r6, #4]
; CHECK: str r0, [r6]
; CHECK-NOT: ldr r0, [sp
; CHECK: ldr r0, [r6, #4]
; CHECK: mov r0, sp
; CHECK: subs r5, r0, r1
; CHECK: mov sp, r5
%tmp1 = call i32 @strlen( i8* %tag )
%tmp3 = call i32 @strlen( i8* %contents )
%tmp4 = add i32 %tmp1, 2

View File

@ -12,8 +12,8 @@ define void @test2() {
; CHECK: test2:
; CHECK: ldr r0, LCPI
; CHECK: add sp, r0
; CHECK: mov sp, r7
; CHECK: sub sp, #4
; CHECK: subs r4, r7, #4
; CHECK: mov sp, r4
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
@ -24,8 +24,8 @@ define i32 @test3() {
; CHECK: add sp, r2
; CHECK: ldr r1, LCPI
; CHECK: add r1, sp
; CHECK: mov sp, r7
; CHECK: sub sp, #4
; CHECK: subs r4, r7, #4
; CHECK: mov sp, r4
%retval = alloca i32, align 4
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16

View File

@ -5,8 +5,13 @@
define hidden i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
entry:
; CHECK: __gcov_execlp:
; CHECK: mov sp, r7
; CHECK: sub sp, #4
; CHECK: sub sp, #8
; CHECK: push
; CHECK: add r7, sp, #4
; CHECK: subs r4, r7, #4
; CHECK: mov sp, r4
; CHECK-NOT: mov sp, r7
; CHECK: add sp, #8
call void @__gcov_flush() nounwind
br i1 undef, label %bb5, label %bb

View File

@ -5,6 +5,10 @@
define internal fastcc i32 @Callee(i32 %i) nounwind {
entry:
; CHECK: Callee:
; CHECK: push
; CHECK: mov r4, sp
; CHECK: sub.w r12, r4, #1000
; CHECK: mov sp, r12
%0 = icmp eq i32 %i, 0 ; <i1> [#uses=1]
br i1 %0, label %bb2, label %bb
@ -17,9 +21,11 @@ bb: ; preds = %entry
ret i32 %4
bb2: ; preds = %entry
; Must restore sp from fp here
; CHECK: mov sp, r7
; CHECK: sub sp, #8
; Must restore sp from fp here. Make sure not to leave sp in a temporarily invalid
; state though. rdar://8465407
; CHECK-NOT: mov sp, r7
; CHECK: sub.w r4, r7, #8
; CHECK: mov sp, r4
; CHECK: pop
ret i32 0
}

View File

@ -0,0 +1,34 @@
; rdar://8465407
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
%struct.buf = type opaque
declare void @bar() nounwind optsize
define void @foo() nounwind optsize {
; CHECK: foo:
; CHECK: push
; CHECK: add r7, sp, #4
; CHECK: sub sp, #4
entry:
%m.i = alloca %struct.buf*, align 4
br label %bb
bb:
br i1 undef, label %bb3, label %bb2
bb2:
call void @bar() nounwind optsize
br i1 undef, label %bb, label %bb3
bb3:
br i1 undef, label %return, label %bb
return:
; CHECK: %return
; 'mov sp, r7' would have left sp in an invalid state
; CHECK-NOT: mov sp, r7
; CHECK-NOT: sub, sp, #4
; CHECK: add sp, #4
ret void
}