mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-14 14:24:05 +00:00
[NaryReassociate] speeds up candidate searching
Summary: This fixes a left-over efficiency issue in D8950. As Andrew and Daniel suggested, we can store the candidates in a stack and pop the top element when it does not dominate the current instruction. This reduces the worst-case time complexity to O(n). Test Plan: a new test in nary-add.ll that exercises this optimization. Reviewers: broune, dberlin, meheff, atrick Reviewed By: atrick Subscribers: llvm-commits, sanjoy Differential Revision: http://reviews.llvm.org/D9055 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235129 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -105,7 +105,9 @@ private:
|
|||||||
ScalarEvolution *SE;
|
ScalarEvolution *SE;
|
||||||
// A lookup table quickly telling which instructions compute the given SCEV.
|
// A lookup table quickly telling which instructions compute the given SCEV.
|
||||||
// Note that there can be multiple instructions at different locations
|
// Note that there can be multiple instructions at different locations
|
||||||
// computing to the same SCEV. For example,
|
// computing to the same SCEV, so we map a SCEV to an instruction list. For
|
||||||
|
// example,
|
||||||
|
//
|
||||||
// if (p1)
|
// if (p1)
|
||||||
// foo(a + b);
|
// foo(a + b);
|
||||||
// if (p2)
|
// if (p2)
|
||||||
@ -190,17 +192,21 @@ Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
auto &LHSCandidates = Pos->second;
|
auto &LHSCandidates = Pos->second;
|
||||||
unsigned NumIterations = 0;
|
// Look for the closest dominator LHS of I that computes LHSExpr, and replace
|
||||||
// Search at most 10 items to avoid running quadratically.
|
// I with LHS + RHS.
|
||||||
static const unsigned MaxNumIterations = 10;
|
//
|
||||||
for (auto LHS = LHSCandidates.rbegin();
|
// Because we traverse the dominator tree in the pre-order, a
|
||||||
LHS != LHSCandidates.rend() && NumIterations < MaxNumIterations;
|
// candidate that doesn't dominate the current instruction won't dominate any
|
||||||
++LHS, ++NumIterations) {
|
// future instruction either. Therefore, we pop it out of the stack. This
|
||||||
if (DT->dominates(*LHS, I)) {
|
// optimization makes the algorithm O(n).
|
||||||
Instruction *NewI = BinaryOperator::CreateAdd(*LHS, RHS, "", I);
|
while (!LHSCandidates.empty()) {
|
||||||
|
Instruction *LHS = LHSCandidates.back();
|
||||||
|
if (DT->dominates(LHS, I)) {
|
||||||
|
Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
|
||||||
NewI->takeName(I);
|
NewI->takeName(I);
|
||||||
return NewI;
|
return NewI;
|
||||||
}
|
}
|
||||||
|
LHSCandidates.pop_back();
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: opt < %s -nary-reassociate -S | FileCheck %s
|
; RUN: opt < %s -nary-reassociate -dce -S | FileCheck %s
|
||||||
|
|
||||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||||
|
|
||||||
@ -105,6 +105,57 @@ return:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; This test involves more conditional reassociation candidates. It exercises
|
||||||
|
; the stack optimization in tryReassociatedAdd that pops the candidates that
|
||||||
|
; do not dominate the current instruction.
|
||||||
|
;
|
||||||
|
; def1
|
||||||
|
; cond1
|
||||||
|
; / \
|
||||||
|
; / \
|
||||||
|
; cond2 use2
|
||||||
|
; / \
|
||||||
|
; / \
|
||||||
|
; def2 def3
|
||||||
|
; cond3
|
||||||
|
; / \
|
||||||
|
; / \
|
||||||
|
; def4 use1
|
||||||
|
;
|
||||||
|
; NaryReassociate should match use1 with def3, and use2 with def1.
|
||||||
|
define void @conditional2(i32 %a, i32 %b, i32 %c, i1 %cond1, i1 %cond2, i1 %cond3) {
|
||||||
|
entry:
|
||||||
|
%def1 = add i32 %a, %b
|
||||||
|
br i1 %cond1, label %bb1, label %bb6
|
||||||
|
bb1:
|
||||||
|
br i1 %cond2, label %bb2, label %bb3
|
||||||
|
bb2:
|
||||||
|
%def2 = add i32 %a, %b
|
||||||
|
call void @foo(i32 %def2)
|
||||||
|
ret void
|
||||||
|
bb3:
|
||||||
|
%def3 = add i32 %a, %b
|
||||||
|
br i1 %cond3, label %bb4, label %bb5
|
||||||
|
bb4:
|
||||||
|
%def4 = add i32 %a, %b
|
||||||
|
call void @foo(i32 %def4)
|
||||||
|
ret void
|
||||||
|
bb5:
|
||||||
|
%0 = add i32 %a, %c
|
||||||
|
%1 = add i32 %0, %b
|
||||||
|
; CHECK: [[t1:%[0-9]+]] = add i32 %def3, %c
|
||||||
|
call void @foo(i32 %1) ; foo((a + c) + b);
|
||||||
|
; CHECK-NEXT: call void @foo(i32 [[t1]])
|
||||||
|
ret void
|
||||||
|
bb6:
|
||||||
|
%2 = add i32 %a, %c
|
||||||
|
%3 = add i32 %2, %b
|
||||||
|
; CHECK: [[t2:%[0-9]+]] = add i32 %def1, %c
|
||||||
|
call void @foo(i32 %3) ; foo((a + c) + b);
|
||||||
|
; CHECK-NEXT: call void @foo(i32 [[t2]])
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; foo((a + b) + c)
|
; foo((a + b) + c)
|
||||||
; foo(((a + d) + b) + c)
|
; foo(((a + d) + b) + c)
|
||||||
; =>
|
; =>
|
||||||
|
Reference in New Issue
Block a user