diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 564f9a49443..d3e1839d00e 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -14,9 +14,34 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // +// This pass implements a shrink wrapping variant of prolog/epilog insertion: +// - Places callee saved register (CSR) spills and restores in the CFG to +// tightly surround uses so that execution paths that do not use CSRs do not +// pay the spill/restore penalty. +// +// - Avoiding placment of spills/restores in loops: if a CSR is used inside a +// loop(nest), the spills are placed in the loop preheader, and restores are +// placed in the loop exit nodes (the successors of the loop _exiting_ nodes). +// +// - Covering paths without CSR uses: e.g. if a restore is placed in a join +// block, a matching spill is added to the end of all immediate predecessor +// blocks that are not reached by a spill. Similarly for saves placed in +// branch blocks. +// +// Shrink wrapping uses an analysis similar to the one in GVNPRE to determine +// which basic blocks require callee-saved register save/restore code. +// +// This pass uses MachineDominators and MachineLoopInfo. Loop information +// is used to prevent shrink wrapping of callee-saved register save/restore +// code into loops. +// //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "shrink-wrap" + #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -27,11 +52,24 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/STLExtras.h" #include +#include + using namespace llvm; +// Shrink Wrapping: +static cl::opt +ShrinkWrapping("shrink-wrap", + cl::desc("Apply shrink wrapping to callee-saved register spills/restores")); + namespace { struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass { static char ID; @@ -42,8 +80,13 @@ namespace { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); + AU.setPreservesCFG(); + if (ShrinkWrapping) { + AU.addRequired(); + AU.addRequired(); + } + AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -69,8 +112,16 @@ namespace { // information and eliminates call frame pseudo instructions. calculateCalleeSavedRegisters(Fn); + // Determine placement of CSR spill/restore code: + // - with shrink wrapping, place spills and restores to tightly + // enclose regions in the Machine CFG of the function where + // they are used. Without shrink wrapping + // - default (no shrink wrapping), place all spills in the + // entry block, all restores in return blocks. + placeCSRSpillsAndRestores(Fn); + // Add the code to save and restore the callee saved registers - saveCalleeSavedRegisters(Fn); + insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. @@ -94,7 +145,7 @@ namespace { delete RS; return true; } - + private: RegScavenger *RS; @@ -102,22 +153,702 @@ namespace { // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; + // Analysis info for spill/restore placement. + // "CSR": "callee saved register". + + // CSRegSet contains indices into the Callee Saved Register Info + // vector built by calculateCalleeSavedRegisters() and accessed + // via MF.getFrameInfo()->getCalleeSavedInfo(). + typedef SparseBitVector<> CSRegSet; + + // CSRegBlockMap maps MachineBasicBlocks to sets of callee + // saved register indices. + typedef DenseMap CSRegBlockMap; + + // Set and maps for computing CSR spill/restore placement: + // used in function (UsedCSRegs) + // used in a basic block (CSRUsed) + // anticipatable in a basic block (Antic{In,Out}) + // available in a basic block (Avail{In,Out}) + // to be spilled at the entry to a basic block (CSRSave) + // to be restored at the end of a basic block (CSRRestore) + + CSRegSet UsedCSRegs; + CSRegBlockMap CSRUsed; + CSRegBlockMap AnticIn, AnticOut; + CSRegBlockMap AvailIn, AvailOut; + CSRegBlockMap CSRSave; + CSRegBlockMap CSRRestore; + + // Entry and return blocks of the current function. + MachineBasicBlock* EntryBlock; + SmallVector ReturnBlocks; + + // Flag to control shrink wrapping per-function: + // may choose to skip shrink wrapping for certain + // functions. + bool ShrinkWrapThisFunction; + + bool calculateSets(MachineFunction &Fn); + void calculateAnticAvail(MachineFunction &Fn); + MachineBasicBlock* moveSpillsOutOfLoops(MachineFunction &Fn, + MachineBasicBlock* MBB); + void addRestoresForSBranchBlock(MachineFunction &Fn, + MachineBasicBlock* MBB); + void moveRestoresOutOfLoops(MachineFunction& Fn, + MachineBasicBlock* MBB, + std::vector& SBLKS); + void addSavesForRJoinBlocks(MachineFunction& Fn, + std::vector& SBLKS); + void placeSpillsAndRestores(MachineFunction &Fn); + void placeCSRSpillsAndRestores(MachineFunction &Fn); void calculateCalleeSavedRegisters(MachineFunction &Fn); - void saveCalleeSavedRegisters(MachineFunction &Fn); + void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); + + // Initialize all shrink wrapping data. + void initShrinkWrappingInfo() { + UsedCSRegs.clear(); + CSRUsed.clear(); + AnticIn.clear(); + AnticOut.clear(); + AvailIn.clear(); + AvailOut.clear(); + CSRSave.clear(); + CSRRestore.clear(); + EntryBlock = 0; + if (! ReturnBlocks.empty()) + ReturnBlocks.clear(); + ShrinkWrapThisFunction = ShrinkWrapping; + } + + // Convienences for dealing with machine loops. + MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) { + assert(LP && "Machine loop is NULL."); + MachineBasicBlock* PHDR = LP->getLoopPreheader(); + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + PHDR = PLP->getLoopPreheader(); + PLP = PLP->getParentLoop(); + } + return PHDR; + } + + MachineLoop* getTopLevelLoopParent(MachineLoop *LP) { + if (LP == 0) + return 0; + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + LP = PLP; + PLP = PLP->getParentLoop(); + } + return LP; + } + +#ifndef NDEBUG + // Debugging methods. + static std::string getBasicBlockName(const MachineBasicBlock* MBB) { + std::ostringstream name; + if (MBB) { + if (MBB->getBasicBlock()) + name << MBB->getBasicBlock()->getName(); + else + name << "_MBB_" << MBB->getNumber(); + } + return name.str(); + } + + static std::string stringifyCSRegSet(const CSRegSet& s, + MachineFunction &Fn) { + const TargetRegisterInfo* TRI = Fn.getTarget().getRegisterInfo(); + const std::vector CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + std::ostringstream srep; + if (CSI.size() == 0) { + srep << "[]"; + return srep.str(); + } + srep << "["; + CSRegSet::iterator I = s.begin(), E = s.end(); + if (I != E) { + unsigned reg = CSI[*I].getReg(); + srep << TRI->getName(reg); + for (++I; I != E; ++I) { + reg = CSI[*I].getReg(); + srep << ","; + srep << TRI->getName(reg); + } + } + srep << "]"; + return srep.str(); + } + + static void dumpSet(const CSRegSet& s, MachineFunction &Fn) { + DOUT << stringifyCSRegSet(s, Fn) << "\n"; + } +#endif + }; char PEI::ID = 0; } - /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. /// FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } +/// placeCSRSpillsAndRestores - determine which MBBs of the function +/// need save, restore code for callee-saved registers by doing a DF analysis +/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs +/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo +/// is used to ensure that CSR save/restore code is not placed inside loops. +/// This function computes the maps of MBBs -> CSRs to spill and restore +/// in CSRSave, CSRRestore. +/// +/// If shrink wrapping is not being performed, place all spills in +/// the entry block, all restores in return blocks. In this case, +/// CSRSave has a single mapping, CSRRestore has mappings for each +/// return block. +/// +void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { + +#ifndef NDEBUG + DOUT << "Place CSR spills/restores for " + << Fn.getFunction()->getName() << "\n"; +#endif + + initShrinkWrappingInfo(); + + if (calculateSets(Fn)) + placeSpillsAndRestores(Fn); +} + +/// calculateAnticAvail - helper for computing the data flow +/// sets required for determining spill/restore placements. +/// +void PEI::calculateAnticAvail(MachineFunction &Fn) { + + // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + bool changed = true; + unsigned iterations = 0; + while (changed) { + changed = false; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCC(MBB)) + MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); + if (SI != SE) { + CSRegSet prevAnticOut = AnticOut[MBB]; + MachineBasicBlock* SUCC = *SI; + AnticOut[MBB] = AnticIn[SUCC]; + for (++SI; SI != SE; ++SI) { + SUCC = *SI; + AnticOut[MBB] &= AnticIn[SUCC]; + } + if (prevAnticOut != AnticOut[MBB]) + changed = true; + } + // AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; + CSRegSet prevAnticIn = AnticIn[MBB]; + AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; + if (prevAnticIn |= AnticIn[MBB]) + changed = true; + + // AvailIn[MBB] = INTERSECT(AvailOut[S] for S in PRED(MBB)) + MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); + if (PI != PE) { + CSRegSet prevAvailIn = AvailIn[MBB]; + MachineBasicBlock* PRED = *PI; + AvailIn[MBB] = AvailOut[PRED]; + for (++PI; PI != PE; ++PI) { + PRED = *PI; + AvailIn[MBB] &= AvailOut[PRED]; + } + if (prevAvailIn != AvailIn[MBB]) + changed = true; + } + // AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; + CSRegSet prevAvailOut = AvailOut[MBB]; + AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; + if (prevAvailOut |= AvailOut[MBB]) + changed = true; + } + ++iterations; + } + + // EXP + AnticIn[EntryBlock].clear(); + AnticOut[EntryBlock].clear(); + +#ifndef NDEBUG + DOUT << "-----------------------------------------------------------\n"; + DOUT << "iterations = " << iterations << "\n"; + DOUT << "-----------------------------------------------------------\n"; + DOUT << "MBB | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"; + DOUT << "-----------------------------------------------------------\n"; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + DOUT << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(AnticIn[MBB], Fn) + << " | " + << stringifyCSRegSet(AnticOut[MBB], Fn) + << " | " + << stringifyCSRegSet(AvailIn[MBB], Fn) + << " | " + << stringifyCSRegSet(AvailOut[MBB], Fn) + << "\n"; + } +#endif +} + +/// calculateSets - helper function for placeCSRSpillsAndRestores, +/// collect the CSRs used in this function, develop the DF sets that +/// describe the minimal regions in the Machine CFG around which spills, +/// restores must be placed. +/// +/// This function decides if shrink wrapping should actually be done: +/// if all CSR uses are in the entry block, no shrink wrapping is possible, +/// so ShrinkWrapping is turned off (for the current function) and the +/// function returns false. +/// +bool PEI::calculateSets(MachineFunction &Fn) { + + // Sets used to compute spill, restore placement sets. + const std::vector CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) { +#ifndef NDEBUG + DOUT << Fn.getFunction()->getName() + << " uses no callee-saved registers.\n"; +#endif + return false; + } + +#ifndef NDEBUG + DOUT << "-----------------------------------------------------------\n"; +#endif + + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + bool allCSRUsesInEntryBlock = true; + + // Initialize UsedCSRegs set, CSRUsed map. + // At the same time, put entry block directly into + // CSRSave, CSRRestore sets if any CSRs are used. + // + // Quick exit option (not implemented): + // Given N CSR uses in entry block, + // revert to default behavior, skip the placement + // step and put all saves in entry, restores in + // return blocks. + + // Set up entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (!MBB->empty() && MBB->back().getDesc().isReturn()) + ReturnBlocks.push_back(MBB); + + // TODO -- check for a use of a CSR in each imm. successor of EntryBlock, + // do not shrink wrap this function if this is the case. + + // If not shrink wrapping (this function) at this point, set bits in + // CSR{Save,Restore}[] and UsedCSRegs, then return. + if (! ShrinkWrapThisFunction) { + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + UsedCSRegs.set(inx); + CSRSave[EntryBlock].set(inx); + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + CSRRestore[ReturnBlocks[ri]].set(inx); + } + return false; + } + + // Walk instructions in all MBBs, create basic sets, choose + // whether or not to shrink wrap this function. + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + unsigned Reg = CSI[inx].getReg(); + // If instruction I reads or modifies Reg, add it to UsedCSRegs, + // CSRUsed map for the current block. + for (unsigned opInx = 0, opEnd = I->getNumOperands(); + opInx != opEnd; ++opInx) { + const MachineOperand &MO = I->getOperand(opInx); + if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) { + // CSR Reg is defined/used in block MBB. + UsedCSRegs.set(inx); + CSRUsed[MBB].set(inx); + // Short-circuit analysis for entry, return blocks: + // if a CSR is used in the entry block, add it directly + // to CSRSave[EntryBlock] and to CSRRestore[R] for R + // in ReturnBlocks. Note CSR uses in non-entry blocks. + if (ShrinkWrapThisFunction) { + if (MBB == EntryBlock) { + CSRSave[MBB].set(inx); + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + CSRRestore[ReturnBlocks[ri]].set(inx); + } else + allCSRUsesInEntryBlock = false; + } else { + // Not shrink wrapping => ensure saves/restores are correctly + // added for entry, return blocks. + CSRSave[EntryBlock].set(inx); + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + CSRRestore[ReturnBlocks[ri]].set(inx); + } + } + } + } + } +#ifndef NDEBUG + DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRUsed[MBB], Fn) << "\n"; +#endif + } + +#ifndef NDEBUG + DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs, Fn) << "\n"; +#endif + + // Early exit: + // 1. Not asked to do shrink wrapping => just "place" all spills(restores) + // in the entry(return) block(s), already done above. + // 2. All CSR uses in entry block => same as case 1, but say we will + // not shrink wrap the current function. + ShrinkWrapThisFunction = (ShrinkWrapping && + ShrinkWrapThisFunction && + ! allCSRUsesInEntryBlock); + if (! ShrinkWrapThisFunction) { + return false; + } + + calculateAnticAvail(Fn); + + return true; +} + +/// moveSpillsOutOfLoops - helper for placeSpillsAndRestores() which +/// relocates a spill from a subgraph in a loop to the loop preheader. +/// Returns the MBB to which saves have been moved, or the given MBB +/// if it is a branch point. +/// +MachineBasicBlock* PEI::moveSpillsOutOfLoops(MachineFunction &Fn, + MachineBasicBlock* MBB) { + if (MBB == 0 || CSRSave[MBB].empty()) + return 0; + + // Block to which saves are moved. + MachineBasicBlock* DEST = 0; + MachineLoopInfo &LI = getAnalysis(); + + if (MachineLoop* LP = LI.getLoopFor(MBB)) { + MachineBasicBlock* LPH = getTopLevelLoopPreheader(LP); + assert(LPH && "Loop has no top level preheader?"); + +#ifndef NDEBUG + DOUT << "Moving saves of " + << stringifyCSRegSet(CSRSave[MBB], Fn) + << " from " << getBasicBlockName(MBB) + << " to " << getBasicBlockName(LPH) << "\n"; +#endif + // Add CSRegSet from MBB to LPH, empty out MBB's CSRegSet. + CSRSave[LPH] |= CSRSave[MBB]; + // If saves moved to entry block, add restores to returns. + if (LPH == EntryBlock) { + for (unsigned i = 0, e = ReturnBlocks.size(); i != e; ++i) + CSRRestore[ReturnBlocks[i]] |= CSRSave[MBB]; + } else { + // Remember where we moved the save so we can add + // restores on successor paths if necessary. + if (LPH->succ_size() > 1) + DEST = LPH; + } + CSRSave[MBB].clear(); + } else if (MBB->succ_size() > 1) + DEST = MBB; + return DEST; +} + +/// addRestoresForSBranchBlock - helper for placeSpillsAndRestores() which +/// adds restores of CSRs saved in branch point MBBs to the front of any +/// successor blocks connected to regions with no uses of the saved CSRs. +/// +void PEI::addRestoresForSBranchBlock(MachineFunction &Fn, + MachineBasicBlock* MBB) { + + if (MBB == 0 || CSRSave[MBB].empty() || MBB->succ_size() < 2) + return; + + // Add restores of CSRs saved in branch point MBBs to the + // front of any succ blocks flowing into regions that + // have no uses of MBB's CSRs. + bool hasCSRUses = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + bool needsRestore = false; + if (CSRUsed[SUCC].intersects(CSRSave[MBB])) { + hasCSRUses = true; + continue; + } + needsRestore = true; + for (df_iterator BI = df_begin(SUCC), + BE = df_end(SUCC); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + if (CSRUsed[SBB].intersects(CSRSave[MBB])) { + hasCSRUses = true; + needsRestore = false; + break; + } + } + // Additional restores are needed for SUCC iff there is at least + // one CSR use reachable from the successors of MBB and there + // are no uses in or below SUCC. + if (needsRestore && hasCSRUses) { +#ifndef NDEBUG + DOUT << "MBB " << getBasicBlockName(MBB) + << " needs a restore on path to successor " + << getBasicBlockName(SUCC) << "\n"; +#endif + // Add restores to SUCC for all CSRs saved in MBB... + CSRRestore[SUCC] = CSRSave[MBB]; + } + } +} + +/// moveRestoresOutOfLoops - helper for placeSpillsAndRestores() which +/// relocates restores from a subgraph in a loop to the loop exit blocks. +/// This function records the MBBs to which restores have been moved in +/// SBLKS. If no restores are moved, SBLKS contains the input MBB if it +/// is a join point in the Machine CFG. +/// +void PEI::moveRestoresOutOfLoops(MachineFunction& Fn, + MachineBasicBlock* MBB, + std::vector& SBLKS) { + + SBLKS.clear(); + if (MBB == 0 || CSRRestore[MBB].empty()) + return; + + MachineLoopInfo &LI = getAnalysis(); + + if (MachineLoop* LP = LI.getLoopFor(MBB)) { + LP = getTopLevelLoopParent(LP); + assert(LP && "Loop with no top level parent?"); + + SmallVector exitBlocks; + + LP->getExitBlocks(exitBlocks); + assert(exitBlocks.size() > 0 && + "Loop has no top level exit blocks?"); + for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { + MachineBasicBlock* EXB = exitBlocks[i]; + +#ifndef NDEBUG + DOUT << "Moving restores of " + << stringifyCSRegSet(CSRRestore[MBB], Fn) + << " from " << getBasicBlockName(MBB) + << " to " << getBasicBlockName(EXB) << "\n"; +#endif + + // Add CSRegSet from MBB to LPE, empty out MBB's CSRegSet. + CSRRestore[EXB] |= CSRRestore[MBB]; + if (EXB->pred_size() > 1) + SBLKS.push_back(EXB); + } + CSRRestore[MBB].clear(); + } else if (MBB->pred_size() > 1) + SBLKS.push_back(MBB); +} + +/// addSavesForRJoinBlocks - Add saves of CSRs restored in join point MBBs +/// to the ends of any pred blocks that flow into MBB from regions that +/// have no uses of MBB's CSRs. +/// +void PEI::addSavesForRJoinBlocks(MachineFunction& Fn, + std::vector& SBLKS) { + + if (SBLKS.empty()) + return; + + for (unsigned i = 0, e = SBLKS.size(); i != e; ++i) { + MachineBasicBlock* MBB = SBLKS[i]; + if (MBB->pred_size() > 1) { + bool needsSave = false; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + + // Walk back up in the CFG from the preds of MBB, look for + // a block that uses any CSR that is restored in MBB. + if (CSRUsed[PRED].intersects(CSRRestore[MBB])) + continue; + needsSave = true; + for (idf_iterator PPI = idf_begin(PRED), + PPE = idf_end(PRED); PPI != PPE; ++PPI) { + MachineBasicBlock* PBB = *PPI; + if (CSRUsed[PBB].intersects(CSRRestore[MBB])) { + needsSave = false; + break; + } + } + if (needsSave) { + // Add saves to PRED for all CSRs restored in MBB... +#ifndef NDEBUG + DOUT << "MBB " << getBasicBlockName(MBB) + << " needs a save on path from predecessor " + << getBasicBlockName(PRED) << "\n"; +#endif + CSRSave[PRED] = CSRRestore[MBB]; + } + } + } + } +} + +/// placeSpillsAndRestores - decide which MBBs need spills, restores +/// of CSRs. +/// +void PEI::placeSpillsAndRestores(MachineFunction &Fn) { + +#ifndef NDEBUG + DOUT << "-----------------------------------------------------------\n"; +#endif + + // Calculate CSR{Save,Restore} using Antic, Avail on the Machine-CFG. + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + // Entry block saves are recorded in UsedCSRegs pass above. + if (MBB != EntryBlock) { + // Intersect (CSRegs - AnticIn[P]) for all predecessors P of MBB + CSRegSet anticInPreds; + MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); + if (PI != PE) { + MachineBasicBlock* PRED = *PI; + anticInPreds = UsedCSRegs - AnticIn[PRED]; + for (++PI; PI != PE; ++PI) { + PRED = *PI; + // Handle self loop. + if (PRED != MBB) + anticInPreds &= (UsedCSRegs - AnticIn[PRED]); + } + } + // CSRSave[MBB] = (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds + CSRSave[MBB] = (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; + + // Remove the CSRs that are saved in the entry block + if (! CSRSave[MBB].empty() && ! CSRSave[EntryBlock].empty()) + CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; + + // Move saves inside loops to the preheaders of the outermost + // containing loops, add restores to blocks reached by saves + // placed at branch points where necessary. + if (MachineBasicBlock* DESTBB = moveSpillsOutOfLoops(Fn, MBB)) { + // Add restores to blocks reached by saves placed at branch + // points where necessary. + addRestoresForSBranchBlock(Fn, DESTBB); + } + } + +#ifndef NDEBUG + if (! CSRSave[MBB].empty()) + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB], Fn) << "\n"; +#endif + + // Compute CSRRestore, which may already be set for return blocks. + if (! CSRRestore[MBB].empty() || MBB->pred_size() == 0) + continue; + + // Intersect (CSRegs - AvailOut[S]) for all successors S of MBB + CSRegSet availOutSucc; + MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); + if (SI != SE) { + MachineBasicBlock* SUCC = *SI; + availOutSucc = UsedCSRegs - AvailOut[SUCC]; + for (++SI; SI != SE; ++SI) { + SUCC = *SI; + // Handle self loop. + if (SUCC != MBB) + availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); + } + } else if (! CSRUsed[MBB].empty()) { + // Take care of uses in return blocks (which have no successors). + availOutSucc = UsedCSRegs; + } + // CSRRestore[MBB] = (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc + CSRRestore[MBB] = (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; + + // Remove the CSRs that are restored in the return blocks. + // Lest this be confusing, note that: + // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. + if (! CSRRestore[MBB].empty() && ! CSRSave[EntryBlock].empty()) + CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; + + // Move restores inside loops to the exits of the outermost (top level) + // containing loops. + std::vector saveBlocks; + moveRestoresOutOfLoops(Fn, MBB, saveBlocks); + + // Add saves of CSRs restored in join point MBBs to the ends + // of any pred blocks that flow into MBB from regions that + // have no uses of MBB's CSRs. + addSavesForRJoinBlocks(Fn, saveBlocks); + +#ifndef NDEBUG + if (! CSRRestore[MBB].empty()) + DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB], Fn) << "\n"; +#endif + } + +#ifndef NDEBUG + DOUT << "-----------------------------------------------------------\n"; + DOUT << "Final SAVE, RESTORE:\n"; + DOUT << "-----------------------------------------------------------\n"; + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) { + if (! CSRSave[MBB].empty()) { + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB], Fn); + if (CSRRestore[MBB].empty()) + DOUT << "\n"; + } + if (! CSRRestore[MBB].empty()) { + if (! CSRSave[MBB].empty()) + DOUT << " "; + DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB], Fn) << "\n"; + } + } +#endif +} + /// calculateCalleeSavedRegisters - Scan the function for modified callee saved /// registers. Also calculate the MaxCallFrameSize and HasCalls variables for /// the function's frame information and eliminates call frame pseudo @@ -221,7 +952,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { unsigned Align = RC->getAlignment(); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to sastify the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. Use the min. + // the TargetRegisterClass if the stack alignment is smaller. + // Use the min. Align = std::min(Align, StackAlign); FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; @@ -236,41 +968,104 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { FFI->setCalleeSavedInfo(CSI); } -/// saveCalleeSavedRegisters - Insert spill code for any callee saved registers -/// that are modified in the function. +/// insertCSRSpillsAndRestores - Insert spill and restore code for +/// callee saved registers used in the function, handling shrink wrapping. /// -void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) { +void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. MachineFrameInfo *FFI = Fn.getFrameInfo(); const std::vector &CSI = FFI->getCalleeSavedInfo(); - + // Early exit if no callee saved registers are modified! if (CSI.empty()) return; const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + MachineBasicBlock::iterator I; + std::vector blockCSI; - // Now that we have a stack slot for each register to be saved, insert spill - // code into the entry block. - MachineBasicBlock *MBB = Fn.begin(); - MachineBasicBlock::iterator I = MBB->begin(); +#ifndef NDEBUG + DOUT << "Inserting spill/restore code for CSRs in function " + << Fn.getFunction()->getName() << "\n"; +#endif - if (!TII.spillCalleeSavedRegisters(*MBB, I, CSI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. It's killed at the spill. - MBB->addLiveIn(CSI[i].getReg()); + // Insert spills. + for (CSRegBlockMap::iterator + BI = CSRSave.begin(), BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet save = BI->second; - // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*MBB, I, CSI[i].getReg(), true, - CSI[i].getFrameIdx(), CSI[i].getRegClass()); + if (save.empty()) + continue; + + if (! ShrinkWrapThisFunction) { + // Spill using target interface. + I = MBB->begin(); + if (!TII.spillCalleeSavedRegisters(*MBB, I, CSI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. It's killed at the spill. + MBB->addLiveIn(CSI[i].getReg()); + + // Insert the spill to the stack frame. + TII.storeRegToStackSlot(*MBB, I, CSI[i].getReg(), true, + CSI[i].getFrameIdx(), CSI[i].getRegClass()); + } + } + } else { +#ifndef NDEBUG + DOUT << "CSRSave[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(save, Fn) << "\n"; +#endif + + blockCSI.clear(); + for (CSRegSet::iterator RI = save.begin(), + RE = save.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not collect callee saved register info"); + + // If MBB has no uses of CSRs being saved, this means saves + // must be inserted at the _end_. + if (! MBB->empty() && ! CSRUsed[MBB].intersects(save)) { + I = MBB->end(); + --I; + if (I->getDesc().isCall()) { + ++I; + } else { + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + I = I2; + } + } else { + I = MBB->begin(); + } + + // When shrink wrapping, use stack slot stores/loads. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + MBB->addLiveIn(blockCSI[i].getReg()); + + // Insert the spill to the stack frame. + TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), + true, + blockCSI[i].getFrameIdx(), + blockCSI[i].getRegClass()); + } } } + // Use CSRRestore to add code to restore the callee-saved registers in + // each block. + for (CSRegBlockMap::iterator + BI = CSRRestore.begin(), BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restore = BI->second; - // Add code to restore the callee-save registers in each exiting block. - for (MachineFunction::iterator FI = Fn.begin(), E = Fn.end(); FI != E; ++FI) - // If last instruction is a return instruction, add an epilogue. - if (!FI->empty() && FI->back().getDesc().isReturn()) { - MBB = FI; + if (restore.empty()) + continue; + if (! ShrinkWrapThisFunction) { + // Restore using target interface. I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the return @@ -283,18 +1078,18 @@ void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) { MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; - - // Restore all registers immediately before the return and any terminators - // that preceed it. + + // Restore all registers immediately before the return and any + // terminators that preceed it. if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), - CSI[i].getFrameIdx(), - CSI[i].getRegClass()); + CSI[i].getFrameIdx(), + CSI[i].getRegClass()); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert multiple - // instructions. + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. if (AtStart) I = MBB->begin(); else { @@ -303,7 +1098,79 @@ void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) { } } } + } else { +#ifndef NDEBUG + DOUT << "CSRRestore[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(restore, Fn) << "\n"; +#endif + + blockCSI.clear(); + for (CSRegSet::iterator RI = restore.begin(), + RE = restore.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not find callee saved register info"); + + // If MBB uses no CSRs but has restores, this means + // it must have restores inserted at the _beginning_. + // N.B. -- not necessary if edge splitting done. + if (MBB->empty() || ! CSRUsed[MBB].intersects(restore)) { + I = MBB->begin(); + } else { + I = MBB->end(); + --I; + + // EXP iff spill/restore implemented with push/pop: + // append restore to block unless it ends in a + // barrier terminator instruction. + + // Skip over all terminator instructions, which are part of the + // return sequence. + if (I->getDesc().isCall()) { + ++I; + } else { + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + I = I2; + } + } + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + +#ifndef NDEBUG + if (! MBB->empty() && ! CSRUsed[MBB].intersects(restore)) { + MachineInstr* MI = BeforeI; + DOUT << "adding restore after "; + DEBUG(MI->dump()); + } else { + DOUT << "adding restore to beginning of " + << getBasicBlockName(MBB) << "\n"; + } +#endif + + // Restore all registers immediately before the return and any + // terminators that preceed it. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), + blockCSI[i].getFrameIdx(), + blockCSI[i].getRegClass()); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } } + } } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. @@ -358,8 +1225,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. - // We currently don't support filling in holes in between fixed sized objects, - // so we adjust 'Offset' to point to the end of last fixed sized + // We currently don't support filling in holes in between fixed sized + // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; @@ -385,8 +1252,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { Offset += FFI->getObjectSize(i); unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, then - // increase the stack alignment to match. + // If the alignment of this object is greater than that of the stack, + // then increase the stack alignment to match. MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; @@ -397,8 +1264,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, then - // increase the stack alignment to match. + // If the alignment of this object is greater than that of the stack, + // then increase the stack alignment to match. MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; @@ -453,7 +1320,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // offsets will be relative to SP not FP; round up the stack size so this // works. if (!RegInfo->targetHandlesStackFrameRounding() && - (FFI->hasCalls() || FFI->hasVarSizedObjects() || + (FFI->hasCalls() || FFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0))) { // If we have reserved argument space for call sites in the function @@ -509,9 +1376,11 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS) RS->enterBasicBlock(BB); + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == TargetInstrInfo::DECLARE) { // Ignore it. @@ -560,6 +1429,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. + TRI.eliminateFrameIndex(MI, SPAdj, RS); // Reset the iterator if we were at the beginning of the BB.