[MachineSink+PGO] Teach MachineSink to use BlockFrequencyInfo

Machine Sink uses loop depth information to select between successors BBs to
sink machine instructions into, where BBs within smaller loop depths are
preferable.  This patch adds support for choosing between successors by using
profile information from BlockFrequencyInfo instead, whenever the information
is available.

Tested it under SPEC2006 train (average of 30 runs for each program); ~1.5%
execution speedup in average on x86-64 darwin.

<rdar://problem/18021659>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218472 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes
2014-09-25 23:14:26 +00:00
parent 5c1c055cb7
commit f4230250a1
2 changed files with 68 additions and 6 deletions

View File

@ -21,6 +21,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
@ -41,6 +42,12 @@ SplitEdges("machine-sink-split",
cl::desc("Split critical edges during machine sinking"),
cl::init(true), cl::Hidden);
static cl::opt<bool>
UseBlockFreqInfo("machine-sink-bfi",
cl::desc("Use block frequency info to find successors to sink"),
cl::init(true), cl::Hidden);
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
@ -53,6 +60,7 @@ namespace {
MachineDominatorTree *DT; // Machine dominator tree
MachinePostDominatorTree *PDT; // Machine post dominator tree
MachineLoopInfo *LI;
const MachineBlockFrequencyInfo *MBFI;
AliasAnalysis *AA;
// Remember which edges have been considered for breaking.
@ -81,6 +89,8 @@ namespace {
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfo>();
}
void releaseMemory() override {
@ -247,6 +257,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTree>();
PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
AA = &getAnalysis<AliasAnalysis>();
bool EverMadeChange = false;
@ -566,14 +577,20 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
}
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
// We give successors with smaller loop depth higher priority.
SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
// Sort Successors according to their loop depth.
// we should sink to. If we have reliable block frequency information
// (frequency != 0) available, give successors with smaller frequencies
// higher priority, otherwise prioritize smaller loop depths.
SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(),
MBB->succ_end());
// Sort Successors according to their loop depth or block frequency info.
std::stable_sort(
Succs.begin(), Succs.end(),
[this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) {
return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
[this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
return HasBlockFreq ? LHSFreq < RHSFreq
: LI->getLoopDepth(L) < LI->getLoopDepth(R);
});
for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(),
E = Succs.end(); SI != E; ++SI) {