From 5c3885ce8e6a3dc69913b50fe6bdc0c89c5432d5 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 1 May 2007 00:52:08 +0000 Subject: [PATCH] Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function. This eliminates the need for add/sub sp brackets around call sites. However, this is not always a good idea. If the "call frame" is large and the target load / store instructions have small immediate field to encode sp offset, this can cause poor codegen. In the worst case, this can make it impossible to scavenge a register if the reserved spill slot is pushed too far apart from sp / fp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PrologEpilogInserter.cpp | 31 +++++++++++++--------------- lib/Target/ARM/ARMRegisterInfo.cpp | 30 ++++++++++++++++++++++++--- lib/Target/ARM/ARMRegisterInfo.h | 2 ++ 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 478f3d3a5f2..0a7a138b16a 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -135,8 +135,9 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { unsigned MaxCallFrameSize = 0; bool HasCalls = false; + std::vector FrameSDOps; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" @@ -144,14 +145,16 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { unsigned Size = I->getOperand(0).getImmedValue(); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; HasCalls = true; - RegInfo->eliminateCallFramePseudoInstr(Fn, *BB, I++); - } else { - ++I; + FrameSDOps.push_back(I); } MachineFrameInfo *FFI = Fn.getFrameInfo(); FFI->setHasCalls(HasCalls); FFI->setMaxCallFrameSize(MaxCallFrameSize); + for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { + MachineBasicBlock::iterator I = FrameSDOps[i]; + RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } // Now figure out which *callee saved* registers are modified by the current // function, thus needing to be saved and restored in the prolog/epilog. @@ -333,10 +336,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (i < MinCSFrameIndex || i > MaxCSFrameIndex) - continue; - + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If stack grows down, we need to add size of find the lowest // address of the object. Offset += FFI->getObjectSize(i); @@ -351,10 +351,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { FFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { - for (int i = FFI->getObjectIndexEnd()-1; i >= 0; --i) { - if ((unsigned)i < MinCSFrameIndex || (unsigned)i > MaxCSFrameIndex) - continue; - + for (unsigned i = MaxCSFrameIndex; i >= MinCSFrameIndex; --i) { unsigned Align = FFI->getObjectAlignment(i); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. @@ -373,7 +370,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { if (RS && RegInfo->hasFP(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) { - // If stack grows down, we need to add size of find the lowest + // If stack grows down, we need to add size of the lowest // address of the object. if (StackGrowsDown) Offset += FFI->getObjectSize(SFI); @@ -447,10 +444,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // subroutines have their stack frames suitable aligned. if (!RegInfo->targetHandlesStackFrameRounding() && (FFI->hasCalls() || FFI->hasVarSizedObjects())) { - // When we have no frame pointer, we reserve argument space for call sites - // in the function immediately on entry to the current function. This - // eliminates the need for add/sub sp brackets around call sites. - if (!RegInfo->hasFP(Fn)) + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (RegInfo->hasReservedCallFrame(Fn)) Offset += FFI->getMaxCallFrameSize(); unsigned AlignMask = TFI.getStackAlignment() - 1; diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 2fdbe681497..bfb8266458a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -386,6 +386,29 @@ bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const { return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); } +// hasReservedCallFrame - Under normal circumstances, when a frame pointer is +// not required, we reserve argument space for call sites in the function +// immediately on entry to the current function. This eliminates the need for +// add/sub sp brackets around call sites. Returns true if the call frame is +// included as part of the stack frame. +bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + unsigned CFSize = FFI->getMaxCallFrameSize(); + ARMFunctionInfo *AFI = MF.getInfo(); + // It's not always a good idea to include the call frame as part of the + // stack frame. ARM (especially Thumb) has small immediate offset to + // address the stack frame. So a large call frame can cause poor codegen + // and may even makes it impossible to scavenge a register. + if (AFI->isThumbFunction()) { + if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 + return false; + } else { + if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 + return false; + } + return !hasFP(MF); +} + /// emitARMRegPlusImmediate - Emits a series of instructions to materialize /// a destreg = basereg + immediate in ARM code. static @@ -605,7 +628,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, void ARMRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (hasFP(MF)) { + if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount // ADJCALLSTACKUP -> add, sp, sp, amount @@ -1146,8 +1169,9 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, Limit = (1 << 8) - 1; goto DoneEstimating; } else if (AddrMode == ARMII::AddrMode5) { - Limit = ((1 << 8) - 1) * 4; - goto DoneEstimating; + unsigned ThisLimit = ((1 << 8) - 1) * 4; + if (ThisLimit < Limit) + Limit = ThisLimit; } } } diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 450f2f0d971..95619ecb210 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -78,6 +78,8 @@ public: bool hasFP(const MachineFunction &MF) const; + bool hasReservedCallFrame(MachineFunction &MF) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;