From 97be1d608e0fd8f0578342932b3b8116e5028a02 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen <stoklund@2pi.dk> Date: Fri, 28 Jun 2013 22:40:43 +0000 Subject: [PATCH] Minimize precision loss when computing cyclic probabilities. Allow block frequencies to exceed 32 bits by using the new BlockFrequency division function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185236 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/BlockFrequencyImpl.h | 84 +++++++++++++--------- test/Analysis/BlockFrequencyInfo/basic.ll | 42 +++++++++++ 2 files changed, 91 insertions(+), 35 deletions(-) diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h index c4c16016f71..c9067a0721b 100644 --- a/include/llvm/Analysis/BlockFrequencyImpl.h +++ b/include/llvm/Analysis/BlockFrequencyImpl.h @@ -85,31 +85,16 @@ class BlockFrequencyImpl { << " --> " << Freqs[BB] << "\n"); } - /// divBlockFreq - Divide BB block frequency by PROB. If Prob = 0 do nothing. - /// - void divBlockFreq(BlockT *BB, BranchProbability Prob) { - uint64_t N = Prob.getNumerator(); - assert(N && "Illegal division by zero!"); - uint64_t D = Prob.getDenominator(); - uint64_t Freq = (Freqs[BB].getFrequency() * D) / N; - - // Should we assert it? - if (Freq > UINT32_MAX) - Freq = UINT32_MAX; - - Freqs[BB] = BlockFrequency(Freq); - DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") /= (" << Prob - << ") --> " << Freqs[BB] << "\n"); - } - // All blocks in postorder. std::vector<BlockT *> POT; // Map Block -> Position in reverse-postorder list. DenseMap<BlockT *, unsigned> RPO; - // Cycle Probability for each bloch. - DenseMap<BlockT *, uint32_t> CycleProb; + // For each loop header, record the per-iteration probability of exiting the + // loop. This is the reciprocal of the expected number of loop iterations. + typedef DenseMap<BlockT*, BranchProbability> LoopExitProbMap; + LoopExitProbMap LoopExitProb; // (reverse-)postorder traversal iterators. typedef typename std::vector<BlockT *>::iterator pot_iterator; @@ -203,10 +188,13 @@ class BlockFrequencyImpl { if (!isLoopHead) return; - assert(EntryFreq >= CycleProb[BB]); - uint32_t CProb = CycleProb[BB]; - uint32_t Numerator = EntryFreq - CProb ? EntryFreq - CProb : 1; - divBlockFreq(BB, BranchProbability(Numerator, EntryFreq)); + // This block is a loop header, so boost its frequency by the expected + // number of loop iterations. The loop blocks will be revisited so they all + // get this boost. + typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB); + assert(I != LoopExitProb.end() && "Loop header missing from table"); + Freqs[BB] /= I->second; + DEBUG(dbgs() << "Loop header scaled to " << Freqs[BB] << ".\n"); } /// doLoop - Propagate block frequency down through the loop. @@ -226,24 +214,50 @@ class BlockFrequencyImpl { } // Compute loop's cyclic probability using backedges probabilities. + BlockFrequency BackFreq; for (typename GT::ChildIteratorType PI = GraphTraits< Inverse<BlockT *> >::child_begin(Head), PE = GraphTraits< Inverse<BlockT *> >::child_end(Head); PI != PE; ++PI) { BlockT *Pred = *PI; assert(Pred); - if (isBackedge(Pred, Head)) { - uint64_t N = getEdgeFreq(Pred, Head).getFrequency(); - uint64_t D = getBlockFreq(Head).getFrequency(); - assert(N <= EntryFreq && "Backedge frequency must be <= EntryFreq!"); - uint64_t Res = (N * EntryFreq) / D; - - assert(Res <= UINT32_MAX); - CycleProb[Head] += (uint32_t) Res; - DEBUG(dbgs() << " CycleProb[" << getBlockName(Head) << "] += " << Res - << " --> " << CycleProb[Head] << "\n"); - } + if (isBackedge(Pred, Head)) + BackFreq += getEdgeFreq(Pred, Head); } + + // The cyclic probability is freq(BackEdges) / freq(Head), where freq(Head) + // only counts edges entering the loop, not the loop backedges. + // The probability of leaving the loop on each iteration is: + // + // ExitProb = 1 - CyclicProb + // + // The Expected number of loop iterations is: + // + // Iterations = 1 / ExitProb + // + uint64_t D = std::max(getBlockFreq(Head).getFrequency(), 1ull); + uint64_t N = std::max(BackFreq.getFrequency(), 1ull); + if (N < D) + N = D - N; + else + // We'd expect N < D, but rounding and saturation means that can't be + // guaranteed. + N = 1; + + // Now ExitProb = N / D, make sure it fits in an i32/i32 fraction. + assert(N <= D); + if (D > UINT32_MAX) { + unsigned Shift = 32 - countLeadingZeros(D); + D >>= Shift; + N >>= Shift; + if (N == 0) + N = 1; + } + BranchProbability LEP = BranchProbability(N, D); + LoopExitProb.insert(std::make_pair(Head, LEP)); + DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP + << " from 1 - " << BackFreq << " / " << getBlockFreq(Head) + << ".\n"); } friend class BlockFrequencyInfo; @@ -258,7 +272,7 @@ class BlockFrequencyImpl { // Clear everything. RPO.clear(); POT.clear(); - CycleProb.clear(); + LoopExitProb.clear(); Freqs.clear(); BlockT *EntryBlock = fn->begin(); diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll index 43dd264402d..ce29fb5ce1b 100644 --- a/test/Analysis/BlockFrequencyInfo/basic.ll +++ b/test/Analysis/BlockFrequencyInfo/basic.ll @@ -90,3 +90,45 @@ exit: } !1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4} + +; CHECK: Printing analysis {{.*}} for function 'nested_loops' +; CHECK: entry = 1.0 +; This test doesn't seem to be assigning sensible frequencies to nested loops. +define void @nested_loops(i32 %a) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: + %x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ] + br label %for.cond4.preheader + +for.cond4.preheader: + %y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ] + %add = add i32 %y.023, %x.024 + br label %for.body6 + +for.body6: + %z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ] + %add7 = add i32 %add, %z.022 + tail call void @g(i32 %add7) #2 + %inc = add i32 %z.022, 1 + %cmp5 = icmp ugt i32 %inc, %a + br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2 + +for.inc8: + %inc9 = add i32 %y.023, 1 + %cmp2 = icmp ugt i32 %inc9, %a + br i1 %cmp2, label %for.inc11, label %for.cond4.preheader, !prof !2 + +for.inc11: + %inc12 = add i32 %x.024, 1 + %cmp = icmp ugt i32 %inc12, %a + br i1 %cmp, label %for.end13, label %for.cond1.preheader, !prof !2 + +for.end13: + ret void +} + +declare void @g(i32) #1 + +!2 = metadata !{metadata !"branch_weights", i32 1, i32 4000}