Re-apply r110655 with fixes. Epilogue must restore sp from fp if the function stack frame has a var-sized object.

Also added a test case to check for the added benefit of this patch: it's optimizing away the unnecessary restore of sp from fp for some non-leaf functions.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110707 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-08-10 19:30:19 +00:00
parent 98197e55c1
commit ac096808a3
7 changed files with 148 additions and 70 deletions

View File

@ -177,7 +177,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
Reserved.set(ARM::FPSCR);
if (STI.isTargetDarwin() || hasFP(MF))
if (hasFP(MF))
Reserved.set(FramePtr);
// Some targets reserve R9.
if (STI.isR9Reserved())
@ -194,7 +194,7 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case ARM::R7:
case ARM::R11:
if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
if (FramePtr == Reg && hasFP(MF))
return true;
break;
case ARM::R9:
@ -511,7 +511,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
if (!STI.isTargetDarwin() && !hasFP(MF)) {
if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPREven1,
GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@ -540,7 +540,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
if (!STI.isTargetDarwin() && !hasFP(MF)) {
if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPROdd1,
GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@ -610,6 +610,10 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
/// or if frame pointer elimination is disabled.
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
// Mac OS X requires FP not to be clobbered for backtracing purpose.
if (STI.isTargetDarwin())
return true;
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Always eliminate non-leaf frame pointers.
return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
@ -683,6 +687,7 @@ static unsigned estimateStackSize(MachineFunction &MF) {
/// instructions will require a scratch register during their expansion later.
unsigned
ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@ -708,7 +713,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
break;
case ARMII::AddrModeT2_i12:
if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
// i12 supports only positive offset so these will be converted to
// i8 opcodes. See llvm::rewriteT2FrameIndex.
if (hasFP(MF) && AFI->hasStackFrame())
Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode6:
// Addressing mode 6 (load/store) instructions can't encode an
@ -860,7 +868,8 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
bool BigStack =
(RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
(RS &&
(estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
estimateRSStackSizeLimit(MF)))
|| MFI->hasVarSizedObjects()
|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
@ -881,9 +890,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
ExtraCSSpill = true;
}
// Darwin ABI requires FP to point to the stack slot that contains the
// previous FP.
if (STI.isTargetDarwin() || hasFP(MF)) {
if (hasFP(MF)) {
MF.getRegInfo().setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@ -976,7 +983,7 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
if (STI.isTargetDarwin() || hasFP(MF))
if (hasFP(MF))
return FramePtr;
return ARM::SP;
}
@ -1546,7 +1553,8 @@ emitPrologue(MachineFunction &MF) const {
// Otherwise, if this is not Darwin, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
if (STI.isTargetDarwin() || hasFP(MF)) {
bool HasFP = hasFP(MF);
if (HasFP) {
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
@ -1565,7 +1573,7 @@ emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
if (STI.isTargetDarwin() || hasFP(MF))
if (HasFP)
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@ -1577,11 +1585,14 @@ emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
if (HasFP)
AFI->setShouldRestoreSPFromFP(true);
}
if (STI.isTargetELF() && hasFP(MF)) {
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
AFI->setShouldRestoreSPFromFP(true);
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
@ -1614,7 +1625,14 @@ emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4, RegState::Kill);
}
AFI->setShouldRestoreSPFromFP(true);
}
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp.
if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
AFI->setShouldRestoreSPFromFP(true);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@ -1669,17 +1687,10 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
// Darwin ABI requires FP to point to the stack slot that contains the
// previous FP.
bool HasFP = hasFP(MF);
if ((STI.isTargetDarwin() && NumBytes) || HasFP) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
if (HasFP ||
AFI->getGPRCalleeSavedArea2Size() ||
AFI->getDPRCalleeSavedAreaSize() ||
AFI->getDPRCalleeSavedAreaOffset()) {
if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
if (NumBytes) {
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
@ -1691,13 +1702,11 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
// Thumb2 or ARM.
if (isARM)
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
.addReg(FramePtr)
.addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
.addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
else
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
.addReg(FramePtr);
}
}
} else if (NumBytes)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);

View File

@ -742,14 +742,15 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned
ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
case ARM::tGPRRegClassID:
return 5 - FPDiff;
case ARM::GPRRegClassID:
return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0);
return RegInfo->hasFP(MF) ? 4 : 5;
case ARM::GPRRegClassID: {
unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
}
case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
case ARM::DPRRegClassID:
return 32 - 10;

View File

@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
/// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
/// emitPrologue.
bool RestoreSPFromFP;
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
bool LRSpilledForFarJump;
@ -95,7 +99,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
VarArgsRegSaveSize(0), HasStackFrame(false),
VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@ -106,7 +110,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
VarArgsRegSaveSize(0), HasStackFrame(false),
VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@ -125,6 +129,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }

View File

@ -294,8 +294,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
if (Subtarget.isThumb1Only()) {
I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
// Mac OS X requires FP not to be clobbered for backtracing purpose.
return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
return RI->hasFP(MF) ? I-1 : I;
}
if (Subtarget.isTargetDarwin()) {
@ -312,8 +311,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
}
// Mac OS X requires FP not to be clobbered for backtracing purpose.
return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
return RI->hasFP(MF) ? I-1 : I;
}
}];
}
@ -403,8 +401,7 @@ def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
if (Subtarget.isThumb1Only()) {
I = THUMB_rGPRAO + (sizeof(THUMB_rGPRAO)/sizeof(unsigned));
// Mac OS X requires FP not to be clobbered for backtracing purpose.
return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
return RI->hasFP(MF) ? I-1 : I;
}
if (Subtarget.isTargetDarwin()) {
@ -421,8 +418,7 @@ def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
I = ARM_rGPRAO_1 + (sizeof(ARM_rGPRAO_1)/sizeof(unsigned));
}
// Mac OS X requires FP not to be clobbered for backtracing purpose.
return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
return RI->hasFP(MF) ? I-1 : I;
}
}];
}
@ -449,11 +445,9 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
tGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
tGPRClass::iterator I =
THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
// Mac OS X requires FP not to be clobbered for backtracing purpose.
return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
return RI->hasFP(MF) ? I-1 : I;
}
}];
}

View File

@ -742,11 +742,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
// Darwin ABI requires FP to point to the stack slot that contains the
// previous FP.
if (STI.isTargetDarwin() || hasFP(MF)) {
// Adjust FP so it point to the stack slot that contains the previous FP.
if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
AFI->setShouldRestoreSPFromFP(true);
}
// Determine starting offsets of spill areas.
@ -764,10 +764,9 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
}
if (STI.isTargetELF() && hasFP(MF)) {
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@ -828,7 +827,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
if (hasFP(MF)) {
if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.

View File

@ -1,16 +1,31 @@
; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5
; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
define void @test1() {
; CHECK: test1:
; CHECK: sub sp, #256
; CHECK: add sp, #256
%tmp = alloca [ 64 x i32 ] , align 4
ret void
}
define void @test2() {
; CHECK: test2:
; CHECK: ldr r0, LCPI
; CHECK: add sp, r0
; CHECK: mov sp, r7
; CHECK: sub sp, #4
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
define i32 @test3() {
; CHECK: test3:
; CHECK: ldr r1, LCPI
; CHECK: add sp, r1
; CHECK: ldr r1, LCPI
; CHECK: add r1, sp
; CHECK: mov sp, r7
; CHECK: sub sp, #4
%retval = alloca i32, align 4
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16

View File

@ -0,0 +1,53 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1]
define internal fastcc i32 @Callee(i32 %i) nounwind {
entry:
; CHECK: Callee:
%0 = icmp eq i32 %i, 0 ; <i1> [#uses=1]
br i1 %0, label %bb2, label %bb
bb: ; preds = %entry
%1 = alloca [1000 x i8], align 4 ; <[1000 x i8]*> [#uses=1]
%.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
%2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
%3 = load i8* %.sub, align 4 ; <i8> [#uses=1]
%4 = sext i8 %3 to i32 ; <i32> [#uses=1]
ret i32 %4
bb2: ; preds = %entry
; Must restore sp from fp here
; CHECK: mov sp, r7
; CHECK: sub sp, #8
; CHECK: pop
ret i32 0
}
declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind
define i32 @main() nounwind {
; CHECK: main:
bb.nph:
br label %bb
bb: ; preds = %bb, %bb.nph
%0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ] ; <i32> [#uses=2]
%j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ] ; <i32> [#uses=1]
%1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1]
%2 = add nsw i32 %1, %j.01 ; <i32> [#uses=2]
%3 = add nsw i32 %0, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %3, 10000 ; <i1> [#uses=1]
br i1 %exitcond, label %bb2, label %bb
bb2: ; preds = %bb
; No need to restore sp from fp here.
; CHECK: printf
; CHECK-NOT: mov sp, r7
; CHECK-NOT: sub sp, #12
; CHECK: pop
%4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
ret i32 0
}
declare i32 @printf(i8* nocapture, ...) nounwind