diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index c80f6dcdb51..0cddff8d9da 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -392,6 +392,10 @@ namespace llvm { /// into tails of their predecessors. extern char &TailDuplicateID; + /// MachineTraceMetrics - This pass computes critical path and CPU resource + /// usage in an ensemble of traces. + extern char &MachineTraceMetricsID; + /// EarlyIfConverter - This pass performs if-conversion on SSA form by /// inserting cmov instructions. extern char &EarlyIfConverterID; diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index e6fa8c3d304..de97957a84c 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -172,6 +172,7 @@ void initializeMachineLoopRangesPass(PassRegistry&); void initializeMachineModuleInfoPass(PassRegistry&); void initializeMachineSchedulerPass(PassRegistry&); void initializeMachineSinkingPass(PassRegistry&); +void initializeMachineTraceMetricsPass(PassRegistry&); void initializeMachineVerifierPassPass(PassRegistry&); void initializeMemCpyOptPass(PassRegistry&); void initializeMemDepPrinterPass(PassRegistry&); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index d240389d7c5..2e189ad7e7d 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -61,6 +61,7 @@ add_llvm_library(LLVMCodeGen MachineSSAUpdater.cpp MachineScheduler.cpp MachineSink.cpp + MachineTraceMetrics.cpp MachineVerifier.cpp OcamlGC.cpp OptimizePHIs.cpp diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 9840a402804..cfe3e9d7e1e 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "early-ifcvt" +#include "MachineTraceMetrics.h" #include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" @@ -515,6 +516,8 @@ class EarlyIfConverter : public MachineFunctionPass { MachineRegisterInfo *MRI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; SSAIfConv IfConv; public: @@ -527,6 +530,8 @@ private: bool tryConvertIf(MachineBasicBlock*); void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); void updateLoops(ArrayRef<MachineBasicBlock*> Removed); + void invalidateTraces(); + bool shouldConvertIf(); }; } // end anonymous namespace @@ -537,6 +542,7 @@ INITIALIZE_PASS_BEGIN(EarlyIfConverter, "early-ifcvt", "Early If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(EarlyIfConverter, "early-ifcvt", "Early If Converter", false, false) @@ -546,6 +552,8 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -576,12 +584,31 @@ void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { Loops->removeBlock(Removed[i]); } +/// Invalidate MachineTraceMetrics before if-conversion. +void EarlyIfConverter::invalidateTraces() { + Traces->invalidate(IfConv.Head); + Traces->invalidate(IfConv.Tail); + Traces->invalidate(IfConv.TBB); + Traces->invalidate(IfConv.FBB); +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool EarlyIfConverter::shouldConvertIf() { + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + DEBUG(dbgs() << MinInstr->getTrace(IfConv.Head)); + return true; +} + /// Attempt repeated if-conversion on MBB, return true if successful. /// bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool Changed = false; - while (IfConv.canConvertIf(MBB)) { + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { // If-convert MBB and update analyses. + invalidateTraces(); SmallVector<MachineBasicBlock*, 4> RemovedBlocks; IfConv.convertIf(RemovedBlocks); Changed = true; @@ -600,6 +627,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = 0; bool Changed = false; IfConv.runOnMachineFunction(MF); diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp new file mode 100644 index 00000000000..b30210109c7 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -0,0 +1,477 @@ +//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "early-ifcvt" +#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/PostOrderIterator.h" + +using namespace llvm; + +char MachineTraceMetrics::ID = 0; +char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; + +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) + +MachineTraceMetrics::MachineTraceMetrics() + : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0), Loops(0) { + std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); +} + +void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + Loops = &getAnalysis<MachineLoopInfo>(); + unsigned NumBlocks = MF.getNumBlockIDs(); + BlockInfo.resize(NumBlocks); + return false; +} + +void MachineTraceMetrics::releaseMemory() { + BlockInfo.clear(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) { + delete Ensembles[i]; + Ensembles[i] = 0; + } +} + +//===----------------------------------------------------------------------===// +// Fixed block information +//===----------------------------------------------------------------------===// +// +// The number of instructions in a basic block and the CPU resources used by +// those instructions don't depend on any given trace strategy. + +/// Is MI an instruction that should be considered free because it will likely +/// be eliminated by later passes? +static bool isFree(const MachineInstr *MI) { + switch(MI->getOpcode()) { + default: return false; + case TargetOpcode::PHI: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + case TargetOpcode::KILL: + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::COPY_TO_REGCLASS: + case TargetOpcode::DBG_VALUE: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::COPY: + return true; + } +} + +/// Compute the resource usage in basic block MBB. +const MachineTraceMetrics::FixedBlockInfo* +MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { + assert(MBB && "No basic block"); + FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; + if (FBI->hasResources()) + return FBI; + + // Compute resource usage in the block. + // FIXME: Compute per-functional unit counts. + FBI->HasCalls = false; + unsigned InstrCount = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (isFree(MI)) + continue; + ++InstrCount; + if (MI->isCall()) + FBI->HasCalls = true; + } + FBI->InstrCount = InstrCount; + return FBI; +} + +//===----------------------------------------------------------------------===// +// Ensemble utility functions +//===----------------------------------------------------------------------===// + +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : CT(*ct) { + BlockInfo.resize(CT.BlockInfo.size()); +} + +// Virtual destructor serves as an anchor. +MachineTraceMetrics::Ensemble::~Ensemble() {} + +MachineLoop* +MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) { + return CT.Loops->getLoopFor(MBB); +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace above MBB. +void MachineTraceMetrics::Ensemble:: +computeDepthResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources from trace above. The top block is simple. + if (!TBI->Pred) { + TBI->InstrDepth = 0; + return; + } + + // Compute from the block above. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); + const FixedBlockInfo *PredFBI = CT.getResources(TBI->Pred); + TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace below MBB. +void MachineTraceMetrics::Ensemble:: +computeHeightResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources for the current block. + TBI->InstrHeight = CT.getResources(MBB)->InstrCount; + + // The trace tail is done. + if (!TBI->Succ) + return; + + // Compute from the block below. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); + TBI->InstrHeight += SuccTBI->InstrHeight; +} + +// Check if depth resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getDepthResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidDepth() ? TBI : 0; +} + +// Check if height resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getHeightResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidHeight() ? TBI : 0; +} + +//===----------------------------------------------------------------------===// +// Trace Selection Strategies +//===----------------------------------------------------------------------===// +// +// A trace selection strategy is implemented as a sub-class of Ensemble. The +// trace through a block B is computed by two DFS traversals of the CFG +// starting from B. One upwards, and one downwards. During the upwards DFS, +// pickTracePred() is called on the post-ordered blocks. During the downwards +// DFS, pickTraceSucc() is called in a post-order. +// + +// MinInstrCountEnsemble - Pick the trace that executes the least number of +// instructions. +namespace { +class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { + const char *getName() { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + +public: + MinInstrCountEnsemble(MachineTraceMetrics *ct) + : MachineTraceMetrics::Ensemble(ct) {} +}; +} + +// Select the preferred predecessor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + MachineLoop *CurLoop = getLoopFor(MBB); + // Don't leave loops, and never follow back-edges. + if (CurLoop && MBB == CurLoop->getHeader()) + return 0; + unsigned CurCount = CT.getResources(MBB)->InstrCount; + const MachineBasicBlock *Best = 0; + unsigned BestDepth = 0; + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + const MachineBasicBlock *Pred = *I; + const MachineTraceMetrics::TraceBlockInfo *PredTBI = + getDepthResources(Pred); + // Ignore invalidated predecessors. This never happens on the first scan, + // but if we rejected this predecessor earlier, it won't be revalidated. + if (!PredTBI) + continue; + // Don't consider predecessors in other loops. + if (getLoopFor(Pred) != CurLoop) + continue; + // Pick the predecessor that would give this block the smallest InstrDepth. + unsigned Depth = PredTBI->InstrDepth + CurCount; + if (!Best || Depth < BestDepth) + Best = Pred, BestDepth = Depth; + } + return Best; +} + +// Select the preferred successor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + MachineLoop *CurLoop = getLoopFor(MBB); + const MachineBasicBlock *Best = 0; + unsigned BestHeight = 0; + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + const MachineBasicBlock *Succ = *I; + const MachineTraceMetrics::TraceBlockInfo *SuccTBI = + getHeightResources(Succ); + // Ignore invalidated successors. + if (!SuccTBI) + continue; + // Don't consider back-edges. + if (CurLoop && Succ == CurLoop->getHeader()) + continue; + // Don't consider successors in other loops. + if (getLoopFor(Succ) != CurLoop) + continue; + // Pick the successor that would give this block the smallest InstrHeight. + unsigned Height = SuccTBI->InstrHeight; + if (!Best || Height < BestHeight) + Best = Succ, BestHeight = Height; + } + return Best; +} + +// Get an Ensemble sub-class for the requested trace strategy. +MachineTraceMetrics::Ensemble * +MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { + assert(strategy < TS_NumStrategies && "Invalid trace strategy enum"); + Ensemble *&E = Ensembles[strategy]; + if (E) + return E; + + // Allocate new Ensemble on demand. + switch (strategy) { + case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this)); + default: llvm_unreachable("Invalid trace strategy enum"); + } +} + +void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + BlockInfo[MBB->getNumber()].invalidate(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->invalidate(MBB); +} + +//===----------------------------------------------------------------------===// +// Trace building +//===----------------------------------------------------------------------===// +// +// Traces are built by two CFG traversals. To avoid recomputing too much, use a +// set abstraction that confines the search to the current loop, and doesn't +// revisit blocks. + +namespace { +struct LoopBounds { + MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; + const MachineLoopInfo *Loops; + const MachineLoop *CurLoop; + bool Downward; + LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks, + const MachineLoopInfo *loops, const MachineLoop *curloop) + : Blocks(blocks), Loops(loops), CurLoop(curloop), Downward(false) {} +}; +} + +// Specialize po_iterator_storage in order to prune the post-order traversal so +// it is limited to the current loop and doesn't traverse the loop back edges. +namespace llvm { +template<> +class po_iterator_storage<LoopBounds, true> { + LoopBounds &LB; +public: + po_iterator_storage(LoopBounds &lb) : LB(lb) {} + void finishPostorder(const MachineBasicBlock*) {} + + bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + // Skip already visited To blocks. + MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; + if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) + return false; + // Don't follow CurLoop backedges. + if (LB.CurLoop && (LB.Downward ? To : From) == LB.CurLoop->getHeader()) + return false; + // Don't leave CurLoop. + if (LB.Loops->getLoopFor(To) != LB.CurLoop) + return false; + // This is a new block. The PO traversal will compute height/depth + // resources, causing us to reject new edges to To. This only works because + // we reject back-edges, so the CFG is cycle-free. + return true; + } +}; +} + +/// Compute the trace through MBB. +void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" + << MBB->getNumber() << '\n'); + // Set up loop bounds for the backwards post-order traversal. + LoopBounds Bounds(BlockInfo, CT.Loops, getLoopFor(MBB)); + + // Run an upwards post-order search for the trace start. + Bounds.Downward = false; + typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO; + for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the predecessors have been visited, pick the preferred one. + TBI.Pred = pickTracePred(*I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leading to I is now known, compute the depth resources. + computeDepthResources(*I); + } + + // Run a downwards post-order search for the trace end. + Bounds.Downward = true; + typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO; + for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the successors have been visited, pick the preferred one. + BlockInfo[I->getNumber()].Succ = pickTraceSucc(*I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leaving I is now known, compute the height resources. + computeHeightResources(*I); + } +} + +/// Invalidate traces through BadMBB. +void +MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { + SmallVector<const MachineBasicBlock*, 16> WorkList; + TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; + + // Invalidate height resources of blocks above MBB. + if (BadTBI.hasValidHeight()) { + BadTBI.invalidateHeight(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " height.\n"); + // Find any MBB predecessors that have MBB as their preferred successor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (TBI.hasValidHeight() && TBI.Succ == MBB) { + TBI.invalidateHeight(); + WorkList.push_back(*I); + } + } + } while (!WorkList.empty()); + } + + // Invalidate depth resources of blocks below MBB. + if (BadTBI.hasValidDepth()) { + BadTBI.invalidateDepth(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " depth.\n"); + // Find any MBB successors that have MBB as their preferred predecessor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (TBI.hasValidDepth() && TBI.Pred == MBB) { + TBI.invalidateDepth(); + WorkList.push_back(*I); + } + } + } while (!WorkList.empty()); + } +} + + +MachineTraceMetrics::Trace +MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { + // FIXME: Check cache tags, recompute as needed. + computeTrace(MBB); + return Trace(*this, BlockInfo[MBB->getNumber()]); +} + +void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { + OS << TE.getName() << " trace:"; + if (TBI.hasValidHeight() && TBI.hasValidDepth()) + OS << ' ' << getInstrCount() << " instrs."; + + const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; + OS << "\n *"; + while (Block->hasValidDepth() && Block->Pred) { + unsigned Num = Block->Pred->getNumber(); + OS << " <- BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + + Block = &TBI; + OS << "\n *"; + while (Block->hasValidHeight() && Block->Succ) { + unsigned Num = Block->Succ->getNumber(); + OS << " -> BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + OS << '\n'; +} diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h new file mode 100644 index 00000000000..086d7eaebff --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.h @@ -0,0 +1,218 @@ +//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the MachineTraceMetrics analysis pass +// that estimates CPU resource usage and critical data dependency paths through +// preferred traces. This is useful for super-scalar CPUs where execution speed +// can be limited both by data dependencies and by limited execution resources. +// +// Out-of-order CPUs will often be executing instructions from multiple basic +// blocks at the same time. This makes it difficult to estimate the resource +// usage accurately in a single basic block. Resources can be estimated better +// by looking at a trace through the current basic block. +// +// For every block, the MachineTraceMetrics pass will pick a preferred trace +// that passes through the block. The trace is chosen based on loop structure, +// branch probabilities, and resource usage. The intention is to pick likely +// traces that would be the most affected by code transformations. +// +// It is expensive to compute a full arbitrary trace for every block, so to +// save some computations, traces are chosen to be convergent. This means that +// if the traces through basic blocks A and B ever cross when moving away from +// A and B, they never diverge again. This applies in both directions - If the +// traces meet above A and B, they won't diverge when going further back. +// +// Traces tend to align with loops. The trace through a block in an inner loop +// will begin at the loop entry block and end at a back edge. If there are +// nested loops, the trace may begin and end at those instead. +// +// For each trace, we compute the critical path length, which is the number of +// cycles required to execute the trace when execution is limited by data +// dependencies only. We also compute the resource height, which is the number +// of cycles required to execute all instructions in the trace when ignoring +// data dependencies. +// +// Every instruction in the current block has a slack - the number of cycles +// execution of the instruction can be delayed without extending the critical +// path. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H +#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class TargetInstrInfo; +class TargetRegisterInfo; +class MachineBasicBlock; +class MachineRegisterInfo; +class MachineLoopInfo; +class MachineLoop; +class raw_ostream; + +class MachineTraceMetrics : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + const MachineLoopInfo *Loops; + +public: + class Ensemble; + class Trace; + static char ID; + MachineTraceMetrics(); + void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + void releaseMemory(); + + friend class Ensemble; + friend class Trace; + + /// Per-basic block information that doesn't depend on the trace through the + /// block. + struct FixedBlockInfo { + /// The number of non-trivial instructions in the block. + /// Doesn't count PHI and COPY instructions that are likely to be removed. + unsigned InstrCount; + + /// True when the block contains calls. + bool HasCalls; + + FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} + + /// Returns true when resource information for this block has been computed. + bool hasResources() const { return InstrCount != ~0u; } + + /// Invalidate resource information. + void invalidate() { InstrCount = ~0u; } + }; + + /// Get the fixed resource information about MBB. Compute it on demand. + const FixedBlockInfo *getResources(const MachineBasicBlock*); + + /// Per-basic block information that relates to a specific trace through the + /// block. Convergent traces means that only one of these is required per + /// block in a trace ensemble. + struct TraceBlockInfo { + /// Trace predecessor, or NULL for the first block in the trace. + const MachineBasicBlock *Pred; + + /// Trace successor, or NULL for the last block in the trace. + const MachineBasicBlock *Succ; + + /// Accumulated number of instructions in the trace above this block. + /// Does not include instructions in this block. + unsigned InstrDepth; + + /// Accumulated number of instructions in the trace below this block. + /// Includes instructions in this block. + unsigned InstrHeight; + + TraceBlockInfo() : Pred(0), Succ(0), InstrDepth(~0u), InstrHeight(~0u) {} + + /// Returns true if the depth resources have been computed from the trace + /// above this block. + bool hasValidDepth() const { return InstrDepth != ~0u; } + + /// Returns true if the height resources have been computed from the trace + /// below this block. + bool hasValidHeight() const { return InstrHeight != ~0u; } + + /// Invalidate depth resources when some block above this one has changed. + void invalidateDepth() { InstrDepth = ~0u; } + + /// Invalidate height resources when a block below this one has changed. + void invalidateHeight() { InstrHeight = ~0u; } + }; + + /// A trace represents a plausible sequence of executed basic blocks that + /// passes through the current basic block one. The Trace class serves as a + /// handle to internal cached data structures. + class Trace { + Ensemble &TE; + TraceBlockInfo &TBI; + + public: + explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} + void print(raw_ostream&) const; + + /// Compute the total number of instructions in the trace. + unsigned getInstrCount() const { + return TBI.InstrDepth + TBI.InstrHeight; + } + }; + + /// A trace ensemble is a collection of traces selected using the same + /// strategy, for example 'minimum resource height'. There is one trace for + /// every block in the function. + class Ensemble { + SmallVector<TraceBlockInfo, 4> BlockInfo; + friend class Trace; + + void computeTrace(const MachineBasicBlock*); + void computeDepthResources(const MachineBasicBlock*); + void computeHeightResources(const MachineBasicBlock*); + + protected: + MachineTraceMetrics &CT; + virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; + virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; + explicit Ensemble(MachineTraceMetrics*); + MachineLoop *getLoopFor(const MachineBasicBlock*); + const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; + const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; + + public: + virtual ~Ensemble(); + virtual const char *getName() =0; + void invalidate(const MachineBasicBlock *MBB); + + /// Get the trace that passes through MBB. + /// The trace is computed on demand. + Trace getTrace(const MachineBasicBlock *MBB); + }; + + /// Strategies for selecting traces. + enum Strategy { + /// Select the trace through a block that has the fewest instructions. + TS_MinInstrCount, + + TS_NumStrategies + }; + + /// Get the trace ensemble representing the given trace selection strategy. + /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, + /// and valid for the lifetime of the analysis pass. + Ensemble *getEnsemble(Strategy); + + /// Invalidate cached information about MBB. This must be called *before* MBB + /// is erased, or the CFG is otherwise changed. + void invalidate(const MachineBasicBlock *MBB); + +private: + // One entry per basic block, indexed by block number. + SmallVector<FixedBlockInfo, 4> BlockInfo; + + // One ensemble per strategy. + Ensemble* Ensembles[TS_NumStrategies]; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Trace &Tr) { + Tr.print(OS); + return OS; +} + +} // end namespace llvm + +#endif