[NaryReassociate] speeds up candidate searching

Summary: This fixes a left-over efficiency issue in D8950. As Andrew and Daniel suggested, we can store the candidates in a stack and pop the top element when it does not dominate the current instruction. This reduces the worst-case time complexity to O(n). Test Plan: a new test in nary-add.ll that exercises this optimization. Reviewers: broune, dberlin, meheff, atrick Reviewed By: atrick Subscribers: llvm-commits, sanjoy Differential Revision: http://reviews.llvm.org/D9055 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235129 91177308-0d34-0410-b5e6-96231b3b80d8
2025-06-14 14:24:05 +00:00 · 2015-04-16 18:42:31 +00:00
parent e60729f73f
commit feecc904c4
2 changed files with 67 additions and 10 deletions
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@ -105,7 +105,9 @@ private:
  ScalarEvolution *SE;
  // A lookup table quickly telling which instructions compute the given SCEV.
  // Note that there can be multiple instructions at different locations
-  // computing to the same SCEV.  For example,
+  // computing to the same SCEV, so we map a SCEV to an instruction list.  For
  // example,
  //
  //   if (p1)
  //     foo(a + b);
  //   if (p2)
@ -190,17 +192,21 @@ Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
    return nullptr;
  auto &LHSCandidates = Pos->second;
-  unsigned NumIterations = 0;
+  // Look for the closest dominator LHS of I that computes LHSExpr, and replace
-  // Search at most 10 items to avoid running quadratically.
+  // I with LHS + RHS.
-  static const unsigned MaxNumIterations = 10;
+  //
-  for (auto LHS = LHSCandidates.rbegin();
+  // Because we traverse the dominator tree in the pre-order, a
-       LHS != LHSCandidates.rend() && NumIterations < MaxNumIterations;
+  // candidate that doesn't dominate the current instruction won't dominate any
-       ++LHS, ++NumIterations) {
+  // future instruction either. Therefore, we pop it out of the stack. This
-    if (DT->dominates(*LHS, I)) {
+  // optimization makes the algorithm O(n).
-      Instruction *NewI = BinaryOperator::CreateAdd(*LHS, RHS, "", I);
+  while (!LHSCandidates.empty()) {
    Instruction *LHS = LHSCandidates.back();
    if (DT->dominates(LHS, I)) {
      Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
      NewI->takeName(I);
      return NewI;
    }
    LHSCandidates.pop_back();
  }
  return nullptr;
 }
--- a/test/Transforms/NaryReassociate/nary-add.ll
+++ b/test/Transforms/NaryReassociate/nary-add.ll
@ -1,4 +1,4 @@
-; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+; RUN: opt < %s -nary-reassociate -dce -S | FileCheck %s
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
@ -105,6 +105,57 @@ return:
  ret void
 }
 ; This test involves more conditional reassociation candidates. It exercises
 ; the stack optimization in tryReassociatedAdd that pops the candidates that
 ; do not dominate the current instruction.
 ;
 ;       def1
 ;      cond1
 ;      /  \
 ;     /    \
 ;   cond2  use2
 ;   /  \
 ;  /    \
 ; def2  def3
 ;      cond3
 ;       /  \
 ;      /    \
 ;    def4   use1
 ;
 ; NaryReassociate should match use1 with def3, and use2 with def1.
 define void @conditional2(i32 %a, i32 %b, i32 %c, i1 %cond1, i1 %cond2, i1 %cond3) {
 entry:
  %def1 = add i32 %a, %b
  br i1 %cond1, label %bb1, label %bb6
 bb1:
  br i1 %cond2, label %bb2, label %bb3
 bb2:
  %def2 = add i32 %a, %b
  call void @foo(i32 %def2)
  ret void
 bb3:
  %def3 = add i32 %a, %b
  br i1 %cond3, label %bb4, label %bb5
 bb4:
  %def4 = add i32 %a, %b
  call void @foo(i32 %def4)
  ret void
 bb5:
  %0 = add i32 %a, %c
  %1 = add i32 %0, %b
 ; CHECK: [[t1:%[0-9]+]] = add i32 %def3, %c
  call void @foo(i32 %1) ; foo((a + c) + b);
 ; CHECK-NEXT: call void @foo(i32 [[t1]])
  ret void
 bb6:
  %2 = add i32 %a, %c
  %3 = add i32 %2, %b
 ; CHECK: [[t2:%[0-9]+]] = add i32 %def1, %c
  call void @foo(i32 %3) ; foo((a + c) + b);
 ; CHECK-NEXT: call void @foo(i32 [[t2]])
  ret void
 }
 ; foo((a + b) + c)
 ; foo(((a + d) + b) + c)
 ;   =>