From 97be1d608e0fd8f0578342932b3b8116e5028a02 Mon Sep 17 00:00:00 2001
From: Jakob Stoklund Olesen <stoklund@2pi.dk>
Date: Fri, 28 Jun 2013 22:40:43 +0000
Subject: [PATCH] Minimize precision loss when computing cyclic probabilities.

Allow block frequencies to exceed 32 bits by using the new
BlockFrequency division function.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185236 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Analysis/BlockFrequencyImpl.h | 84 +++++++++++++---------
 test/Analysis/BlockFrequencyInfo/basic.ll  | 42 +++++++++++
 2 files changed, 91 insertions(+), 35 deletions(-)

diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h
index c4c16016f71..c9067a0721b 100644
--- a/include/llvm/Analysis/BlockFrequencyImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyImpl.h
@@ -85,31 +85,16 @@ class BlockFrequencyImpl {
                  << " --> " << Freqs[BB] << "\n");
   }
 
-  /// divBlockFreq - Divide BB block frequency by PROB. If Prob = 0 do nothing.
-  ///
-  void divBlockFreq(BlockT *BB, BranchProbability Prob) {
-    uint64_t N = Prob.getNumerator();
-    assert(N && "Illegal division by zero!");
-    uint64_t D = Prob.getDenominator();
-    uint64_t Freq = (Freqs[BB].getFrequency() * D) / N;
-
-    // Should we assert it?
-    if (Freq > UINT32_MAX)
-      Freq = UINT32_MAX;
-
-    Freqs[BB] = BlockFrequency(Freq);
-    DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") /= (" << Prob
-                 << ") --> " << Freqs[BB] << "\n");
-  }
-
   // All blocks in postorder.
   std::vector<BlockT *> POT;
 
   // Map Block -> Position in reverse-postorder list.
   DenseMap<BlockT *, unsigned> RPO;
 
-  // Cycle Probability for each bloch.
-  DenseMap<BlockT *, uint32_t> CycleProb;
+  // For each loop header, record the per-iteration probability of exiting the
+  // loop. This is the reciprocal of the expected number of loop iterations.
+  typedef DenseMap<BlockT*, BranchProbability> LoopExitProbMap;
+  LoopExitProbMap LoopExitProb;
 
   // (reverse-)postorder traversal iterators.
   typedef typename std::vector<BlockT *>::iterator pot_iterator;
@@ -203,10 +188,13 @@ class BlockFrequencyImpl {
     if (!isLoopHead)
       return;
 
-    assert(EntryFreq >= CycleProb[BB]);
-    uint32_t CProb = CycleProb[BB];
-    uint32_t Numerator = EntryFreq - CProb ? EntryFreq - CProb : 1;
-    divBlockFreq(BB, BranchProbability(Numerator, EntryFreq));
+    // This block is a loop header, so boost its frequency by the expected
+    // number of loop iterations. The loop blocks will be revisited so they all
+    // get this boost.
+    typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB);
+    assert(I != LoopExitProb.end() && "Loop header missing from table");
+    Freqs[BB] /= I->second;
+    DEBUG(dbgs() << "Loop header scaled to " << Freqs[BB] << ".\n");
   }
 
   /// doLoop - Propagate block frequency down through the loop.
@@ -226,24 +214,50 @@ class BlockFrequencyImpl {
     }
 
     // Compute loop's cyclic probability using backedges probabilities.
+    BlockFrequency BackFreq;
     for (typename GT::ChildIteratorType
          PI = GraphTraits< Inverse<BlockT *> >::child_begin(Head),
          PE = GraphTraits< Inverse<BlockT *> >::child_end(Head);
          PI != PE; ++PI) {
       BlockT *Pred = *PI;
       assert(Pred);
-      if (isBackedge(Pred, Head)) {
-        uint64_t N = getEdgeFreq(Pred, Head).getFrequency();
-        uint64_t D = getBlockFreq(Head).getFrequency();
-        assert(N <= EntryFreq && "Backedge frequency must be <= EntryFreq!");
-        uint64_t Res = (N * EntryFreq) / D;
-
-        assert(Res <= UINT32_MAX);
-        CycleProb[Head] += (uint32_t) Res;
-        DEBUG(dbgs() << "  CycleProb[" << getBlockName(Head) << "] += " << Res
-                     << " --> " << CycleProb[Head] << "\n");
-      }
+      if (isBackedge(Pred, Head))
+        BackFreq += getEdgeFreq(Pred, Head);
     }
+
+    // The cyclic probability is freq(BackEdges) / freq(Head), where freq(Head)
+    // only counts edges entering the loop, not the loop backedges.
+    // The probability of leaving the loop on each iteration is:
+    //
+    //   ExitProb = 1 - CyclicProb
+    //
+    // The Expected number of loop iterations is:
+    //
+    //   Iterations = 1 / ExitProb
+    //
+    uint64_t D = std::max(getBlockFreq(Head).getFrequency(), 1ull);
+    uint64_t N = std::max(BackFreq.getFrequency(), 1ull);
+    if (N < D)
+      N = D - N;
+    else
+      // We'd expect N < D, but rounding and saturation means that can't be
+      // guaranteed.
+      N = 1;
+
+    // Now ExitProb = N / D, make sure it fits in an i32/i32 fraction.
+    assert(N <= D);
+    if (D > UINT32_MAX) {
+      unsigned Shift = 32 - countLeadingZeros(D);
+      D >>= Shift;
+      N >>= Shift;
+      if (N == 0)
+        N = 1;
+    }
+    BranchProbability LEP = BranchProbability(N, D);
+    LoopExitProb.insert(std::make_pair(Head, LEP));
+    DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP
+                 << " from 1 - " << BackFreq << " / " << getBlockFreq(Head)
+                 << ".\n");
   }
 
   friend class BlockFrequencyInfo;
@@ -258,7 +272,7 @@ class BlockFrequencyImpl {
     // Clear everything.
     RPO.clear();
     POT.clear();
-    CycleProb.clear();
+    LoopExitProb.clear();
     Freqs.clear();
 
     BlockT *EntryBlock = fn->begin();
diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll
index 43dd264402d..ce29fb5ce1b 100644
--- a/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -90,3 +90,45 @@ exit:
 }
 
 !1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
+
+; CHECK: Printing analysis {{.*}} for function 'nested_loops'
+; CHECK: entry = 1.0
+; This test doesn't seem to be assigning sensible frequencies to nested loops.
+define void @nested_loops(i32 %a) {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  br label %for.cond4.preheader
+
+for.cond4.preheader:
+  %y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ]
+  %add = add i32 %y.023, %x.024
+  br label %for.body6
+
+for.body6:
+  %z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+  %add7 = add i32 %add, %z.022
+  tail call void @g(i32 %add7) #2
+  %inc = add i32 %z.022, 1
+  %cmp5 = icmp ugt i32 %inc, %a
+  br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2
+
+for.inc8:
+  %inc9 = add i32 %y.023, 1
+  %cmp2 = icmp ugt i32 %inc9, %a
+  br i1 %cmp2, label %for.inc11, label %for.cond4.preheader, !prof !2
+
+for.inc11:
+  %inc12 = add i32 %x.024, 1
+  %cmp = icmp ugt i32 %inc12, %a
+  br i1 %cmp, label %for.end13, label %for.cond1.preheader, !prof !2
+
+for.end13:
+  ret void
+}
+
+declare void @g(i32) #1
+
+!2 = metadata !{metadata !"branch_weights", i32 1, i32 4000}