mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 00:32:55 +00:00
Minimize precision loss when computing cyclic probabilities.
Allow block frequencies to exceed 32 bits by using the new BlockFrequency division function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185236 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6a636a813f
commit
97be1d608e
@ -85,31 +85,16 @@ class BlockFrequencyImpl {
|
||||
<< " --> " << Freqs[BB] << "\n");
|
||||
}
|
||||
|
||||
/// divBlockFreq - Divide BB block frequency by PROB. If Prob = 0 do nothing.
|
||||
///
|
||||
void divBlockFreq(BlockT *BB, BranchProbability Prob) {
|
||||
uint64_t N = Prob.getNumerator();
|
||||
assert(N && "Illegal division by zero!");
|
||||
uint64_t D = Prob.getDenominator();
|
||||
uint64_t Freq = (Freqs[BB].getFrequency() * D) / N;
|
||||
|
||||
// Should we assert it?
|
||||
if (Freq > UINT32_MAX)
|
||||
Freq = UINT32_MAX;
|
||||
|
||||
Freqs[BB] = BlockFrequency(Freq);
|
||||
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") /= (" << Prob
|
||||
<< ") --> " << Freqs[BB] << "\n");
|
||||
}
|
||||
|
||||
// All blocks in postorder.
|
||||
std::vector<BlockT *> POT;
|
||||
|
||||
// Map Block -> Position in reverse-postorder list.
|
||||
DenseMap<BlockT *, unsigned> RPO;
|
||||
|
||||
// Cycle Probability for each bloch.
|
||||
DenseMap<BlockT *, uint32_t> CycleProb;
|
||||
// For each loop header, record the per-iteration probability of exiting the
|
||||
// loop. This is the reciprocal of the expected number of loop iterations.
|
||||
typedef DenseMap<BlockT*, BranchProbability> LoopExitProbMap;
|
||||
LoopExitProbMap LoopExitProb;
|
||||
|
||||
// (reverse-)postorder traversal iterators.
|
||||
typedef typename std::vector<BlockT *>::iterator pot_iterator;
|
||||
@ -203,10 +188,13 @@ class BlockFrequencyImpl {
|
||||
if (!isLoopHead)
|
||||
return;
|
||||
|
||||
assert(EntryFreq >= CycleProb[BB]);
|
||||
uint32_t CProb = CycleProb[BB];
|
||||
uint32_t Numerator = EntryFreq - CProb ? EntryFreq - CProb : 1;
|
||||
divBlockFreq(BB, BranchProbability(Numerator, EntryFreq));
|
||||
// This block is a loop header, so boost its frequency by the expected
|
||||
// number of loop iterations. The loop blocks will be revisited so they all
|
||||
// get this boost.
|
||||
typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB);
|
||||
assert(I != LoopExitProb.end() && "Loop header missing from table");
|
||||
Freqs[BB] /= I->second;
|
||||
DEBUG(dbgs() << "Loop header scaled to " << Freqs[BB] << ".\n");
|
||||
}
|
||||
|
||||
/// doLoop - Propagate block frequency down through the loop.
|
||||
@ -226,24 +214,50 @@ class BlockFrequencyImpl {
|
||||
}
|
||||
|
||||
// Compute loop's cyclic probability using backedges probabilities.
|
||||
BlockFrequency BackFreq;
|
||||
for (typename GT::ChildIteratorType
|
||||
PI = GraphTraits< Inverse<BlockT *> >::child_begin(Head),
|
||||
PE = GraphTraits< Inverse<BlockT *> >::child_end(Head);
|
||||
PI != PE; ++PI) {
|
||||
BlockT *Pred = *PI;
|
||||
assert(Pred);
|
||||
if (isBackedge(Pred, Head)) {
|
||||
uint64_t N = getEdgeFreq(Pred, Head).getFrequency();
|
||||
uint64_t D = getBlockFreq(Head).getFrequency();
|
||||
assert(N <= EntryFreq && "Backedge frequency must be <= EntryFreq!");
|
||||
uint64_t Res = (N * EntryFreq) / D;
|
||||
|
||||
assert(Res <= UINT32_MAX);
|
||||
CycleProb[Head] += (uint32_t) Res;
|
||||
DEBUG(dbgs() << " CycleProb[" << getBlockName(Head) << "] += " << Res
|
||||
<< " --> " << CycleProb[Head] << "\n");
|
||||
}
|
||||
if (isBackedge(Pred, Head))
|
||||
BackFreq += getEdgeFreq(Pred, Head);
|
||||
}
|
||||
|
||||
// The cyclic probability is freq(BackEdges) / freq(Head), where freq(Head)
|
||||
// only counts edges entering the loop, not the loop backedges.
|
||||
// The probability of leaving the loop on each iteration is:
|
||||
//
|
||||
// ExitProb = 1 - CyclicProb
|
||||
//
|
||||
// The Expected number of loop iterations is:
|
||||
//
|
||||
// Iterations = 1 / ExitProb
|
||||
//
|
||||
uint64_t D = std::max(getBlockFreq(Head).getFrequency(), 1ull);
|
||||
uint64_t N = std::max(BackFreq.getFrequency(), 1ull);
|
||||
if (N < D)
|
||||
N = D - N;
|
||||
else
|
||||
// We'd expect N < D, but rounding and saturation means that can't be
|
||||
// guaranteed.
|
||||
N = 1;
|
||||
|
||||
// Now ExitProb = N / D, make sure it fits in an i32/i32 fraction.
|
||||
assert(N <= D);
|
||||
if (D > UINT32_MAX) {
|
||||
unsigned Shift = 32 - countLeadingZeros(D);
|
||||
D >>= Shift;
|
||||
N >>= Shift;
|
||||
if (N == 0)
|
||||
N = 1;
|
||||
}
|
||||
BranchProbability LEP = BranchProbability(N, D);
|
||||
LoopExitProb.insert(std::make_pair(Head, LEP));
|
||||
DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP
|
||||
<< " from 1 - " << BackFreq << " / " << getBlockFreq(Head)
|
||||
<< ".\n");
|
||||
}
|
||||
|
||||
friend class BlockFrequencyInfo;
|
||||
@ -258,7 +272,7 @@ class BlockFrequencyImpl {
|
||||
// Clear everything.
|
||||
RPO.clear();
|
||||
POT.clear();
|
||||
CycleProb.clear();
|
||||
LoopExitProb.clear();
|
||||
Freqs.clear();
|
||||
|
||||
BlockT *EntryBlock = fn->begin();
|
||||
|
@ -90,3 +90,45 @@ exit:
|
||||
}
|
||||
|
||||
!1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
|
||||
|
||||
; CHECK: Printing analysis {{.*}} for function 'nested_loops'
|
||||
; CHECK: entry = 1.0
|
||||
; This test doesn't seem to be assigning sensible frequencies to nested loops.
|
||||
define void @nested_loops(i32 %a) {
|
||||
entry:
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader:
|
||||
%x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
|
||||
br label %for.cond4.preheader
|
||||
|
||||
for.cond4.preheader:
|
||||
%y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ]
|
||||
%add = add i32 %y.023, %x.024
|
||||
br label %for.body6
|
||||
|
||||
for.body6:
|
||||
%z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
|
||||
%add7 = add i32 %add, %z.022
|
||||
tail call void @g(i32 %add7) #2
|
||||
%inc = add i32 %z.022, 1
|
||||
%cmp5 = icmp ugt i32 %inc, %a
|
||||
br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2
|
||||
|
||||
for.inc8:
|
||||
%inc9 = add i32 %y.023, 1
|
||||
%cmp2 = icmp ugt i32 %inc9, %a
|
||||
br i1 %cmp2, label %for.inc11, label %for.cond4.preheader, !prof !2
|
||||
|
||||
for.inc11:
|
||||
%inc12 = add i32 %x.024, 1
|
||||
%cmp = icmp ugt i32 %inc12, %a
|
||||
br i1 %cmp, label %for.end13, label %for.cond1.preheader, !prof !2
|
||||
|
||||
for.end13:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @g(i32) #1
|
||||
|
||||
!2 = metadata !{metadata !"branch_weights", i32 1, i32 4000}
|
||||
|
Loading…
x
Reference in New Issue
Block a user