blockfreq: Only one mass distribution per node

Remove the concepts of "forward" and "general" mass distributions, which
was wrong.  The split might have made sense in an early version of the
algorithm, but it's definitely wrong now.

<rdar://problem/14292693>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207195 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Duncan P. N. Exon Smith
2014-04-25 04:38:43 +00:00
parent 9c9891431b
commit 39087bfbf0
3 changed files with 48 additions and 94 deletions

View File

@@ -1024,17 +1024,14 @@ public:
/// This class collates the successor edge weights for later processing. /// This class collates the successor edge weights for later processing.
/// ///
/// \a DidOverflow indicates whether \a Total did overflow while adding to /// \a DidOverflow indicates whether \a Total did overflow while adding to
/// the distribution. It should never overflow twice. There's no flag for /// the distribution. It should never overflow twice.
/// whether \a ForwardTotal overflows, since when \a Total exceeds 32-bits
/// they both get re-computed during \a normalize().
struct Distribution { struct Distribution {
typedef SmallVector<Weight, 4> WeightList; typedef SmallVector<Weight, 4> WeightList;
WeightList Weights; ///< Individual successor weights. WeightList Weights; ///< Individual successor weights.
uint64_t Total; ///< Sum of all weights. uint64_t Total; ///< Sum of all weights.
bool DidOverflow; ///< Whether \a Total did overflow. bool DidOverflow; ///< Whether \a Total did overflow.
uint32_t ForwardTotal; ///< Total excluding backedges.
Distribution() : Total(0), DidOverflow(false), ForwardTotal(0) {} Distribution() : Total(0), DidOverflow(false) {}
void addLocal(const BlockNode &Node, uint64_t Amount) { void addLocal(const BlockNode &Node, uint64_t Amount) {
add(Node, Amount, Weight::Local); add(Node, Amount, Weight::Local);
} }
@@ -1079,9 +1076,8 @@ public:
/// \brief Add an edge to the distribution. /// \brief Add an edge to the distribution.
/// ///
/// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the /// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the
/// edge is forward/exit/backedge is in the context of LoopHead. Otherwise, /// edge is local/exit/backedge is in the context of LoopHead. Otherwise,
/// every edge should be a forward edge (since all the loops are packaged /// every edge should be a local edge (since all the loops are packaged up).
/// up).
void addToDist(Distribution &Dist, const LoopData *OuterLoop, void addToDist(Distribution &Dist, const LoopData *OuterLoop,
const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight); const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
@@ -1119,19 +1115,6 @@ public:
/// backedges and exits are stored in its entry in Loops. /// backedges and exits are stored in its entry in Loops.
/// ///
/// Mass is distributed in parallel from two copies of the source mass. /// Mass is distributed in parallel from two copies of the source mass.
///
/// The first mass (forward) represents the distribution of mass through the
/// local DAG. This distribution should lose mass at loop exits and ignore
/// backedges.
///
/// The second mass (general) represents the behavior of the loop in the
/// global context. In a given distribution from the head, how much mass
/// exits, and to where? How much mass returns to the loop head?
///
/// The forward mass should be split up between local successors and exits,
/// but only actually distributed to the local successors. The general mass
/// should be split up between all three types of successors, but distributed
/// only to exits and backedges.
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, void distributeMass(const BlockNode &Source, LoopData *OuterLoop,
Distribution &Dist); Distribution &Dist);
@@ -1270,23 +1253,17 @@ template <> inline std::string getBlockName(const BasicBlock *BB) {
/// in \a LoopData::Exits. Otherwise, fetch it from /// in \a LoopData::Exits. Otherwise, fetch it from
/// BranchProbabilityInfo. /// BranchProbabilityInfo.
/// ///
/// - Each successor is categorized as \a Weight::Local, a normal /// - Each successor is categorized as \a Weight::Local, a local edge
/// forward edge within the current loop, \a Weight::Backedge, a /// within the current loop, \a Weight::Backedge, a backedge to the
/// backedge to the loop header, or \a Weight::Exit, any successor /// loop header, or \a Weight::Exit, any successor outside the loop.
/// outside the loop. The weight, the successor, and its category /// The weight, the successor, and its category are stored in \a
/// are stored in \a Distribution. There can be multiple edges to /// Distribution. There can be multiple edges to each successor.
/// each successor.
/// ///
/// - Normalize the distribution: scale weights down so that their sum /// - Normalize the distribution: scale weights down so that their sum
/// is 32-bits, and coalesce multiple edges to the same node. /// is 32-bits, and coalesce multiple edges to the same node.
/// ///
/// - Distribute the mass accordingly, dithering to minimize mass loss, /// - Distribute the mass accordingly, dithering to minimize mass loss,
/// as described in \a distributeMass(). Mass is distributed in /// as described in \a distributeMass().
/// parallel in two ways: forward, and general. Local successors
/// take their mass from the forward mass, while exit and backedge
/// successors take their mass from the general mass. Additionally,
/// exit edges use up (ignored) mass from the forward mass, and local
/// edges use up (ignored) mass from the general distribution.
/// ///
/// Finally, calculate the loop scale from the accumulated backedge mass. /// Finally, calculate the loop scale from the accumulated backedge mass.
/// ///

View File

@@ -389,31 +389,12 @@ typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
/// 2. Calculate the portion's mass as \a RemMass times P. /// 2. Calculate the portion's mass as \a RemMass times P.
/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
/// the current portion's weight and mass. /// the current portion's weight and mass.
///
/// Mass is distributed in two ways: full distribution and forward
/// distribution. The latter ignores backedges, and uses the parallel fields
/// \a RemForwardWeight and \a RemForwardMass.
struct DitheringDistributer { struct DitheringDistributer {
uint32_t RemWeight; uint32_t RemWeight;
uint32_t RemForwardWeight;
BlockMass RemMass; BlockMass RemMass;
BlockMass RemForwardMass;
DitheringDistributer(Distribution &Dist, const BlockMass &Mass); DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
BlockMass takeLocalMass(uint32_t Weight) {
(void)takeMass(Weight);
return takeForwardMass(Weight);
}
BlockMass takeExitMass(uint32_t Weight) {
(void)takeForwardMass(Weight);
return takeMass(Weight);
}
BlockMass takeBackedgeMass(uint32_t Weight) { return takeMass(Weight); }
private:
BlockMass takeForwardMass(uint32_t Weight);
BlockMass takeMass(uint32_t Weight); BlockMass takeMass(uint32_t Weight);
}; };
} }
@@ -422,22 +403,9 @@ DitheringDistributer::DitheringDistributer(Distribution &Dist,
const BlockMass &Mass) { const BlockMass &Mass) {
Dist.normalize(); Dist.normalize();
RemWeight = Dist.Total; RemWeight = Dist.Total;
RemForwardWeight = Dist.ForwardTotal;
RemMass = Mass; RemMass = Mass;
RemForwardMass = Dist.ForwardTotal ? Mass : BlockMass();
} }
BlockMass DitheringDistributer::takeForwardMass(uint32_t Weight) {
// Compute the amount of mass to take.
assert(Weight && "invalid weight");
assert(Weight <= RemForwardWeight);
BlockMass Mass = RemForwardMass * BranchProbability(Weight, RemForwardWeight);
// Decrement totals (dither).
RemForwardWeight -= Weight;
RemForwardMass -= Mass;
return Mass;
}
BlockMass DitheringDistributer::takeMass(uint32_t Weight) { BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
assert(Weight && "invalid weight"); assert(Weight && "invalid weight");
assert(Weight <= RemWeight); assert(Weight <= RemWeight);
@@ -468,13 +436,6 @@ void Distribution::add(const BlockNode &Node, uint64_t Amount,
W.Amount = Amount; W.Amount = Amount;
W.Type = Type; W.Type = Type;
Weights.push_back(W); Weights.push_back(W);
if (Type == Weight::Backedge)
return;
// Update forward total. Don't worry about overflow here, since then Total
// will exceed 32-bits and they'll both be recomputed in normalize().
ForwardTotal += Amount;
} }
static void combineWeight(Weight &W, const Weight &OtherW) { static void combineWeight(Weight &W, const Weight &OtherW) {
@@ -554,7 +515,6 @@ void Distribution::normalize() {
// Early exit when combined into a single successor. // Early exit when combined into a single successor.
if (Weights.size() == 1) { if (Weights.size() == 1) {
Total = 1; Total = 1;
ForwardTotal = Weights.front().Type != Weight::Backedge;
Weights.front().Amount = 1; Weights.front().Amount = 1;
return; return;
} }
@@ -574,9 +534,8 @@ void Distribution::normalize() {
return; return;
// Recompute the total through accumulation (rather than shifting it) so that // Recompute the total through accumulation (rather than shifting it) so that
// it's accurate after shifting. ForwardTotal is dirty here anyway. // it's accurate after shifting.
Total = 0; Total = 0;
ForwardTotal = 0;
// Sum the weights to each node and shift right if necessary. // Sum the weights to each node and shift right if necessary.
for (Weight &W : Weights) { for (Weight &W : Weights) {
@@ -588,11 +547,6 @@ void Distribution::normalize() {
// Update the total. // Update the total.
Total += W.Amount; Total += W.Amount;
if (W.Type == Weight::Backedge)
continue;
// Update the forward total.
ForwardTotal += W.Amount;
} }
assert(Total <= UINT32_MAX); assert(Total <= UINT32_MAX);
} }
@@ -732,8 +686,7 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
LoopData *OuterLoop, LoopData *OuterLoop,
Distribution &Dist) { Distribution &Dist) {
BlockMass Mass = getPackageMass(*this, Source); BlockMass Mass = getPackageMass(*this, Source);
DEBUG(dbgs() << " => mass: " << Mass DEBUG(dbgs() << " => mass: " << Mass << "\n");
<< " ( general | forward )\n");
// Distribute mass to successors as laid out in Dist. // Distribute mass to successors as laid out in Dist.
DitheringDistributer D(Dist, Mass); DitheringDistributer D(Dist, Mass);
@@ -741,8 +694,7 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
#ifndef NDEBUG #ifndef NDEBUG
auto debugAssign = [&](const BlockNode &T, const BlockMass &M, auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
const char *Desc) { const char *Desc) {
dbgs() << " => assign " << M << " (" << D.RemMass << "|" dbgs() << " => assign " << M << " (" << D.RemMass << ")";
<< D.RemForwardMass << ")";
if (Desc) if (Desc)
dbgs() << " [" << Desc << "]"; dbgs() << " [" << Desc << "]";
if (T.isValid()) if (T.isValid())
@@ -753,11 +705,11 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
#endif #endif
for (const Weight &W : Dist.Weights) { for (const Weight &W : Dist.Weights) {
// Check for a local edge (forward and non-exit). // Check for a local edge (non-backedge and non-exit).
BlockMass Taken = D.takeMass(W.Amount);
if (W.Type == Weight::Local) { if (W.Type == Weight::Local) {
BlockMass Local = D.takeLocalMass(W.Amount); getPackageMass(*this, W.TargetNode) += Taken;
getPackageMass(*this, W.TargetNode) += Local; DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
DEBUG(debugAssign(W.TargetNode, Local, nullptr));
continue; continue;
} }
@@ -766,17 +718,15 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
// Check for a backedge. // Check for a backedge.
if (W.Type == Weight::Backedge) { if (W.Type == Weight::Backedge) {
BlockMass Back = D.takeBackedgeMass(W.Amount); OuterLoop->BackedgeMass += Taken;
OuterLoop->BackedgeMass += Back; DEBUG(debugAssign(BlockNode(), Taken, "back"));
DEBUG(debugAssign(BlockNode(), Back, "back"));
continue; continue;
} }
// This must be an exit. // This must be an exit.
assert(W.Type == Weight::Exit); assert(W.Type == Weight::Exit);
BlockMass Exit = D.takeExitMass(W.Amount); OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Exit)); DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
DEBUG(debugAssign(W.TargetNode, Exit, "exit"));
} }
} }

View File

@@ -0,0 +1,27 @@
; RUN: opt < %s -analyze -block-freq | FileCheck %s
define void @double_backedge(i1 %x) {
; CHECK-LABEL: Printing analysis {{.*}} for function 'double_backedge':
; CHECK-NEXT: block-frequency-info: double_backedge
entry:
; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
br label %loop
loop:
; CHECK-NEXT: loop: float = 10.0,
br i1 %x, label %exit, label %loop.1, !prof !0
loop.1:
; CHECK-NEXT: loop.1: float = 9.0,
br i1 %x, label %loop, label %loop.2, !prof !1
loop.2:
; CHECK-NEXT: loop.2: float = 5.0,
br label %loop
exit:
; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
ret void
}
!0 = metadata !{metadata !"branch_weights", i32 1, i32 9}
!1 = metadata !{metadata !"branch_weights", i32 4, i32 5}