[LCG] Implement Tarjan's algorithm correctly this time. We have to walk

up the stack finishing the exploration of each entries children before we're finished in addition to accounting for their low-links. Added a unittest that really hammers home the need for this with interlocking cycles that would each appear distinct otherwise and crash or compute the wrong result. As part of this, nuke a stale fixme and bring the rest of the implementation still more closely in line with the original algorithm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206966 91177308-0d34-0410-b5e6-96231b3b80d8
2024-07-08 18:30:04 +00:00 · 2014-04-23 10:31:17 +00:00 · 2014-04-23 10:31:17 +00:00 · b9619110af
commit b9619110af
parent 57683b8aba
3 changed files with 100 additions and 34 deletions
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@ -381,7 +381,8 @@ private:
  /// \brief Helper to form a new SCC out of the top of a DFSStack-like
  /// structure.
  SCC *formSCCFromDFSStack(
-      SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack);
+      SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack,
      SmallVectorImpl<std::pair<Node *, Node::iterator>>::iterator SCCBegin);
  /// \brief Retrieve the next node in the post-order SCC walk of the call graph.
  SCC *getNextSCCInPostOrder();
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@ -152,27 +152,26 @@ void LazyCallGraph::updateGraphPtrs() {
 }
 LazyCallGraph::SCC *LazyCallGraph::formSCCFromDFSStack(
-    SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack) {
+    SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack,
    SmallVectorImpl<std::pair<Node *, Node::iterator>>::iterator SCCBegin) {
  // The tail of the stack is the new SCC. Allocate the SCC and pop the stack
  // into it.
  SCC *NewSCC = new (SCCBPA.Allocate()) SCC();
-  // Because we don't follow the strict Tarjan recursive formulation, walk
+  for (auto I = SCCBegin, E = DFSStack.end(); I != E; ++I) {
-  // from the top of the stack down, propagating the lowest link and stopping
+    Node *SCCN = I->first;
-  // when the DFS number is the lowest link.
+    assert(SCCN->LowLink >= SCCBegin->first->LowLink &&
-  int LowestLink = DFSStack.back().first->LowLink;
+           "We cannot have a low link in an SCC lower than its root on the "
-  do {
+           "stack!");
-    Node *SCCN = DFSStack.pop_back_val().first;
+
    SCCMap[&SCCN->getFunction()] = NewSCC;
    NewSCC->Nodes.push_back(SCCN);
    LowestLink = std::min(LowestLink, SCCN->LowLink);
    bool Inserted =
        NewSCC->NodeSet.insert(&SCCN->getFunction());
    (void)Inserted;
    assert(Inserted && "Cannot have duplicates in the DFSStack!");
-  } while (!DFSStack.empty() && LowestLink <= DFSStack.back().first->DFSNumber);
+  }
-  assert(LowestLink == NewSCC->Nodes.back()->DFSNumber &&
+  DFSStack.erase(SCCBegin, DFSStack.end());
         "Cannot stop with a DFS number greater than the lowest link!");
  // A final pass over all edges in the SCC (this remains linear as we only
  // do this once when we build the SCC) to connect it to the parent sets of
@ -209,36 +208,45 @@ LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() {
    DFSStack.push_back(std::make_pair(N, N->begin()));
  }
-  Node *N = DFSStack.back().first;
+  auto SI = DFSStack.rbegin();
-  if (N->DFSNumber == 0) {
+  if (SI->first->DFSNumber == 0) {
    // This node hasn't been visited before, assign it a DFS number and remove
    // it from the entry set.
-    N->LowLink = N->DFSNumber = NextDFSNumber++;
+    SI->first->LowLink = SI->first->DFSNumber = NextDFSNumber++;
-    SCCEntryNodes.remove(&N->getFunction());
+    SCCEntryNodes.remove(&SI->first->getFunction());
  }
-  for (auto I = DFSStack.back().second, E = N->end(); I != E; ++I) {
+  do {
-    Node *ChildN = *I;
+    Node *N = SI->first;
-    if (ChildN->DFSNumber == 0) {
+    for (auto I = SI->second, E = N->end(); I != E; ++I) {
-      // Mark that we should start at this child when next this node is the
+      Node *ChildN = *I;
-      // top of the stack. We don't start at the next child to ensure this
+      if (ChildN->DFSNumber == 0) {
-      // child's lowlink is reflected.
+        // Mark that we should start at this child when next this node is the
-      // FIXME: I don't actually think this is required, and we could start
+        // top of the stack. We don't start at the next child to ensure this
-      // at the next child.
+        // child's lowlink is reflected.
-      DFSStack.back().second = I;
+        SI->second = I;
-      // Recurse onto this node via a tail call.
+        // Recurse onto this node via a tail call.
-      DFSStack.push_back(std::make_pair(ChildN, ChildN->begin()));
+        DFSStack.push_back(std::make_pair(ChildN, ChildN->begin()));
-      return LazyCallGraph::getNextSCCInPostOrder();
+        return LazyCallGraph::getNextSCCInPostOrder();
      }
      // Track the lowest link of the childen, if any are still in the stack.
      if (ChildN->LowLink < N->LowLink && !SCCMap.count(&ChildN->getFunction()))
        N->LowLink = ChildN->LowLink;
    }
    // No more children to process for this stack entry.
    SI->second = N->end();
-    // Track the lowest link of the childen, if any are still in the stack.
+    if (N->LowLink == N->DFSNumber)
-    if (ChildN->LowLink < N->LowLink && !SCCMap.count(&ChildN->getFunction()))
+      // Form the new SCC out of the top of the DFS stack.
-      N->LowLink = ChildN->LowLink;
+      return formSCCFromDFSStack(DFSStack, std::prev(SI.base()));
  }
-  // Form the new SCC out of the top of the DFS stack.
+    ++SI;
-  return formSCCFromDFSStack(DFSStack);
+  } while (SI != DFSStack.rend());
  llvm_unreachable(
      "We cannot reach the bottom of the stack without popping an SCC.");
 }
 char LazyCallGraphAnalysis::PassID;
--- a/unittests/Analysis/LazyCallGraphTest.cpp
+++ b/unittests/Analysis/LazyCallGraphTest.cpp
@ -248,4 +248,61 @@ TEST(LazyCallGraphTest, BasicGraphFormation) {
  EXPECT_EQ(CG.postorder_scc_end(), SCCI);
 }
 static Function &lookupFunction(Module &M, StringRef Name) {
  for (Function &F : M)
    if (F.getName() == Name)
      return F;
  report_fatal_error("Couldn't find function!");
 }
 TEST(LazyCallGraphTest, MultiArmSCC) {
  // Two interlocking cycles. The really useful thing about this SCC is that it
  // will require Tarjan's DFS to backtrack and finish processing all of the
  // children of each node in the SCC.
  std::unique_ptr<Module> M = parseAssembly(
      "define void @a() {\n"
      "entry:\n"
      "  call void @b()\n"
      "  call void @d()\n"
      "  ret void\n"
      "}\n"
      "define void @b() {\n"
      "entry:\n"
      "  call void @c()\n"
      "  ret void\n"
      "}\n"
      "define void @c() {\n"
      "entry:\n"
      "  call void @a()\n"
      "  ret void\n"
      "}\n"
      "define void @d() {\n"
      "entry:\n"
      "  call void @e()\n"
      "  ret void\n"
      "}\n"
      "define void @e() {\n"
      "entry:\n"
      "  call void @a()\n"
      "  ret void\n"
      "}\n");
  LazyCallGraph CG(*M);
  // Force the graph to be fully expanded.
  auto SCCI = CG.postorder_scc_begin();
  LazyCallGraph::SCC *SCC = *SCCI++;
  EXPECT_EQ(CG.postorder_scc_end(), SCCI);
  LazyCallGraph::Node *A = CG.lookup(lookupFunction(*M, "a"));
  LazyCallGraph::Node *B = CG.lookup(lookupFunction(*M, "b"));
  LazyCallGraph::Node *C = CG.lookup(lookupFunction(*M, "c"));
  LazyCallGraph::Node *D = CG.lookup(lookupFunction(*M, "d"));
  LazyCallGraph::Node *E = CG.lookup(lookupFunction(*M, "e"));
  EXPECT_EQ(SCC, CG.lookupSCC(A->getFunction()));
  EXPECT_EQ(SCC, CG.lookupSCC(B->getFunction()));
  EXPECT_EQ(SCC, CG.lookupSCC(C->getFunction()));
  EXPECT_EQ(SCC, CG.lookupSCC(D->getFunction()));
  EXPECT_EQ(SCC, CG.lookupSCC(E->getFunction()));
 }
 }