diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 374d6e565d4..7c98b36e9ae 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_MACHINEFRAMEINFO_H #include "llvm/ADT/SmallVector.h" +//#include "llvm/ADT/IndexedMap.h" #include "llvm/System/DataTypes.h" #include #include @@ -103,10 +104,14 @@ class MachineFrameInfo { // protector. bool MayNeedSP; + // PreAllocated - If true, the object was mapped into the local frame + // block and doesn't need additional handling for allocation beyond that. + bool PreAllocated; + StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, bool isSS, bool NSP) : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), - isSpillSlot(isSS), MayNeedSP(NSP) {} + isSpillSlot(isSS), MayNeedSP(NSP), PreAllocated(false) {} }; /// Objects - The list of stack objects allocated... @@ -195,8 +200,20 @@ class MachineFrameInfo { /// const TargetFrameInfo &TFI; + /// LocalFrameObjects - References to frame indices which are mapped + /// into the local frame allocation block. + SmallVector, 32> LocalFrameObjects; + + /// LocalFrameSize - Size of the pre-allocated local frame block. + int64_t LocalFrameSize; + + /// LocalFrameBaseOffset - The base offset from the stack pointer at + /// function entry of the local frame blob. Set by PEI for use by + /// target in eliminateFrameIndex(). + int64_t LocalFrameBaseOffset; + public: - explicit MachineFrameInfo(const TargetFrameInfo &tfi) : TFI(tfi) { + explicit MachineFrameInfo(const TargetFrameInfo &tfi) : TFI(tfi) { StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0; HasVarSizedObjects = false; FrameAddressTaken = false; @@ -206,6 +223,8 @@ public: StackProtectorIdx = -1; MaxCallFrameSize = 0; CSIValid = false; + LocalFrameSize = 0; + LocalFrameBaseOffset = 0; } /// hasStackObjects - Return true if there are any stack objects in this @@ -252,6 +271,42 @@ public: /// unsigned getNumObjects() const { return Objects.size(); } + /// mapLocalFrameObject - Map a frame index into the local object block + void mapLocalFrameObject(int ObjectIndex, int64_t Offset) { + LocalFrameObjects.push_back(std::pair(ObjectIndex, Offset)); + Objects[ObjectIndex + NumFixedObjects].PreAllocated = true; + } + + /// getLocalFrameObjectMap - Get the local offset mapping for a for an object + std::pair getLocalFrameObjectMap(int i) { + assert (i >= 0 && (unsigned)i < LocalFrameObjects.size() && + "Invalid local object reference!"); + return LocalFrameObjects[i]; + } + + /// getLocalFrameObjectCount - Return the number of objects allocated into + /// the local object block. + int64_t getLocalFrameObjectCount() { return LocalFrameObjects.size(); } + + /// setLocalFrameBaseOffset - Set the base SP offset of the local frame + /// blob. + void setLocalFrameBaseOffset(int64_t o) { LocalFrameBaseOffset = o; } + + /// getLocalFrameBaseOffset - Get the base SP offset of the local frame + /// blob. + int64_t getLocalFrameBaseOffset() const { return LocalFrameBaseOffset; } + + /// getLocalFrameSize - Get the size of the local object blob. + int64_t getLocalFrameSize() const { return LocalFrameSize; } + + /// isObjectPreAllocated - Return true if the object was pre-allocated into + /// the local block. + bool isObjectPreAllocated(int ObjectIdx) const { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + return Objects[ObjectIdx+NumFixedObjects].PreAllocated; + } + /// getObjectSize - Return the size of the specified object. /// int64_t getObjectSize(int ObjectIdx) const { diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 3e6b3e15d22..0fa498a2063 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -198,6 +198,13 @@ namespace llvm { /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. FunctionPass *createSjLjEHPass(const TargetLowering *tli); + /// createLocalStackSlotAllocationPass - This pass assigns local frame + /// indices to stack slots relative to one another and allocates + /// base registers to access them when it is estimated by the target to + /// be out of range of normal frame pointer or stack pointer index + /// addressing. + FunctionPass *createLocalStackSlotAllocationPass(); + } // End llvm namespace #endif diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index a1e270e6128..a8b9419ad20 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen LiveIntervalAnalysis.cpp LiveStackAnalysis.cpp LiveVariables.cpp + LocalStackAllocation.cpp LowerSubregs.cpp MachineBasicBlock.cpp MachineCSE.cpp diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 20df99e0390..b856d9093eb 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -74,6 +74,16 @@ static cl::opt EnableMCLogging("enable-mc-api-logging", cl::Hidden, static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); +// Enabled or disable local stack object block allocation. This is an +// experimental pass that allocates locals relative to one another before +// register allocation and then assigns them to actual stack slots as a block +// later in PEI. This will eventually allow targets with limited index offset +// range to allocate additional base registers (not just FP and SP) to +// more efficiently reference locals, as well as handle situations where +// locals cannot be referenced via SP or FP at all (dynamic stack realignment +// together with variable sized objects, for example). +cl::opt EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(false), + cl::Hidden, cl::desc("Enable pre-regalloc stack frame index allocation")); static cl::opt AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), @@ -344,6 +354,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); + // Assign local variables to stack slots relative to one another and simplify + // frame index references where possible. Final stack slot locations will be + // assigned in PEI. + if (EnableLocalStackAlloc) + PM.add(createLocalStackSlotAllocationPass()); + if (OptLevel != CodeGenOpt::None) { // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp new file mode 100644 index 00000000000..dabfb469c34 --- /dev/null +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -0,0 +1,169 @@ +//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass assigns local frame indices to stack slots relative to one another +// and allocates additional base registers to access them when the target +// estimates the are likely to be out of range of stack pointer and frame +// pointer relative addressing. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "localstackalloc" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" + +using namespace llvm; + +STATISTIC(NumAllocations, "Number of frame indices processed"); + +namespace { + class LocalStackSlotPass: public MachineFunctionPass { + int64_t LocalStackSize; + + void calculateFrameObjectOffsets(MachineFunction &Fn); + public: + static char ID; // Pass identification, replacement for typeid + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { } + bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + const char *getPassName() const { + return "Local Stack Slot Allocation"; + } + + private: + }; +} // end anonymous namespace + +char LocalStackSlotPass::ID = 0; + +FunctionPass *llvm::createLocalStackSlotAllocationPass() { + return new LocalStackSlotPass(); +} + +bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + calculateFrameObjectOffsets(MF); + DEBUG(dbgs() << LocalStackSize << " bytes of local storage pre-allocated\n"); + return true; +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +static inline void +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, + unsigned &MaxAlign) { + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " + << Offset << "\n"); + MFI->mapLocalFrameObject(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); + + ++NumAllocations; +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + int64_t Offset = 0; + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, MaxAlign); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, MaxAlign); + LargeStackObjs.insert(i); + } + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (LargeStackObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, MaxAlign); + } + + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + if (!RegInfo->targetHandlesStackFrameRounding()) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + } + + // Remember how big this blob of stack space is + LocalStackSize = Offset; +} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index cba92fad12b..8f1d3a6da61 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -19,6 +19,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -32,6 +33,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" @@ -39,6 +41,10 @@ using namespace llvm; +// FIXME: For testing purposes only. Remove once the pre-allocation pass +// is done. +extern cl::opt EnableLocalStackAlloc; + char PEI::ID = 0; INITIALIZE_PASS(PEI, "prologepilog", @@ -462,8 +468,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, Offset = (Offset + Align - 1) / Align * Align; if (StackGrowsDown) { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); MFI->setObjectOffset(FrameIdx, Offset); Offset += MFI->getObjectSize(FrameIdx); } @@ -548,6 +556,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } + // Store the offset of the start of the local allocation block. This + // will be used later when resolving frame base virtual register pseudos. + MFI->setLocalFrameBaseOffset(Offset); + if (EnableLocalStackAlloc) { + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = MFI->getLocalFrameBaseOffset() + Entry.second; + + AdjustStackOffset(MFI, Entry.first, StackGrowsDown, FIOffset, MaxAlign); + } + } + // FIXME: Allocate locals. Once the block allocation pass is turned on, + // this simplifies to just the second loop, since all of the large objects + // will have already been handled. The second loop can also simplify a + // bit, as the conditionals inside aren't all necessary. + // Make sure that the stack protector comes before the local variables on the // stack. SmallSet LargeStackObjs; @@ -557,6 +585,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Assign large stack objects first. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i)) + continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) @@ -576,6 +606,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i)) + continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex())