Simplify n-ary adds by reassociation

Summary:
This transformation reassociates a n-ary add so that the add can partially reuse
existing instructions. For example, this pass can simplify

  void foo(int a, int b) {
    bar(a + b);
    bar((a + 2) + b);
  }

to

  void foo(int a, int b) {
    int t = a + b;
    bar(t);
    bar(t + 2);
  }

saving one add instruction.

Fixes PR22357 (https://llvm.org/bugs/show_bug.cgi?id=22357).

Test Plan: nary-add.ll

Reviewers: broune, dberlin, hfinkel, meheff, sanjoy, atrick

Reviewed By: sanjoy, atrick

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D8950

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234855 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jingyue Wu 2015-04-14 04:59:22 +00:00
parent 35adce33f1
commit 9cecacd16a
7 changed files with 343 additions and 0 deletions

View File

@ -206,6 +206,7 @@ void initializeMergedLoadStoreMotionPass(PassRegistry &);
void initializeMetaRenamerPass(PassRegistry&);
void initializeMergeFunctionsPass(PassRegistry&);
void initializeModuleDebugInfoPrinterPass(PassRegistry&);
void initializeNaryReassociatePass(PassRegistry&);
void initializeNoAAPass(PassRegistry&);
void initializeObjCARCAliasAnalysisPass(PassRegistry&);
void initializeObjCARCAPElimPass(PassRegistry&);

View File

@ -109,6 +109,7 @@ namespace {
(void) llvm::createLowerExpectIntrinsicPass();
(void) llvm::createLowerInvokePass();
(void) llvm::createLowerSwitchPass();
(void) llvm::createNaryReassociatePass();
(void) llvm::createNoAAPass();
(void) llvm::createObjCARCAliasAnalysisPass();
(void) llvm::createObjCARCAPElimPass();

View File

@ -457,6 +457,12 @@ FunctionPass *createRewriteStatepointsForGCPass();
//
FunctionPass *createFloat2IntPass();
//===----------------------------------------------------------------------===//
//
// NaryReassociate - Simplify n-ary operations by reassociation.
//
FunctionPass *createNaryReassociatePass();
} // End llvm namespace
#endif

View File

@ -29,6 +29,7 @@ add_llvm_library(LLVMScalarOpts
LowerExpectIntrinsic.cpp
MemCpyOptimizer.cpp
MergedLoadStoreMotion.cpp
NaryReassociate.cpp
PartiallyInlineLibCalls.cpp
PlaceSafepoints.cpp
Reassociate.cpp

View File

@ -0,0 +1,206 @@
//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass reassociates n-ary add expressions and eliminates the redundancy
// exposed by the reassociation.
//
// A motivating example:
//
// void foo(int a, int b) {
// bar(a + b);
// bar((a + 2) + b);
// }
//
// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
// the above code to
//
// int t = a + b;
// bar(t);
// bar(t + 2);
//
// However, the Reassociate pass is unable to do that because it processes each
// instruction individually and believes (a + 2) + b is the best form according
// to its rank system.
//
// To address this limitation, NaryReassociate reassociates an expression in a
// form that reuses existing instructions. As a result, NaryReassociate can
// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
// (a + b) is computed before.
//
// NaryReassociate works as follows. For every instruction in the form of (a +
// b) + c, it checks whether a + c or b + c is already computed by a dominating
// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
// c) + a respectively. To efficiently look up whether an expression is
// computed before, we store each instruction seen and its SCEV into an
// SCEV-to-instruction map.
//
// Although the algorithm pattern-matches only ternary additions, it
// automatically handles many >3-ary expressions by walking through the function
// in the depth-first order. For example, given
//
// (a + c) + d
// ((a + b) + c) + d
//
// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
// ((a + c) + b) + d into ((a + c) + d) + b.
//
// Limitations and TODO items:
//
// 1) We only considers n-ary adds for now. This should be extended and
// generalized.
//
// 2) Besides arithmetic operations, similar reassociation can be applied to
// GEPs. For example, if
// X = &arr[a]
// dominates
// Y = &arr[a + b]
// we may rewrite Y into X + b.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "nary-reassociate"
namespace {
class NaryReassociate : public FunctionPass {
public:
static char ID;
NaryReassociate(): FunctionPass(ID) {
initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
// TODO: can we preserve ScalarEvolution?
AU.addRequired<ScalarEvolution>();
AU.setPreservesCFG();
}
private:
// Reasssociates I to a better form.
Instruction *tryReassociateAdd(Instruction *I);
// A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
// Rewrites I to LHS + RHS if LHS is computed already.
Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
DominatorTree *DT;
ScalarEvolution *SE;
// A lookup table quickly telling which instructions compute the given SCEV.
// Note that there can be multiple instructions at different locations
// computing to the same SCEV. For example,
// if (p1)
// foo(a + b);
// if (p2)
// bar(a + b);
DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
};
} // anonymous namespace
char NaryReassociate::ID = 0;
INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
false, false)
FunctionPass *llvm::createNaryReassociatePass() {
return new NaryReassociate();
}
bool NaryReassociate::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
// Traverse the dominator tree in the depth-first order. This order makes sure
// all bases of a candidate are in Candidates when we process it.
bool Changed = false;
SeenExprs.clear();
for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
BasicBlock *BB = Node->getBlock();
for (auto I = BB->begin(); I != BB->end(); ++I) {
if (I->getOpcode() == Instruction::Add) {
if (Instruction *NewI = tryReassociateAdd(I)) {
I->replaceAllUsesWith(NewI);
I->eraseFromParent();
I = NewI;
}
// We should add the rewritten instruction because tryReassociateAdd may
// have invalidated the original one.
SeenExprs[SE->getSCEV(I)].push_back(I);
}
}
}
return Changed;
}
Instruction *NaryReassociate::tryReassociateAdd(Instruction *I) {
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
return NewI;
if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
return NewI;
return nullptr;
}
Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
Instruction *I) {
Value *A = nullptr, *B = nullptr;
// To be conservative, we reassociate I only when it is the only user of A+B.
if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
// I = (A + B) + RHS
// = (A + RHS) + B or (B + RHS) + A
const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
const SCEV *RHSExpr = SE->getSCEV(RHS);
if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
return NewI;
if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
return NewI;
}
return nullptr;
}
Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
Value *RHS, Instruction *I) {
auto Pos = SeenExprs.find(LHSExpr);
// Bail out if LHSExpr is not previously seen.
if (Pos == SeenExprs.end())
return nullptr;
auto &LHSCandidates = Pos->second;
unsigned NumIterations = 0;
// Search at most 10 items to avoid running quadratically.
static const unsigned MaxNumIterations = 10;
for (auto LHS = LHSCandidates.rbegin();
LHS != LHSCandidates.rend() && NumIterations < MaxNumIterations;
++LHS, ++NumIterations) {
if (DT->dominates(*LHS, I)) {
Instruction *NewI = BinaryOperator::CreateAdd(*LHS, RHS, "", I);
NewI->takeName(I);
return NewI;
}
}
return nullptr;
}

View File

@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
initializeMergedLoadStoreMotionPass(Registry);
initializeNaryReassociatePass(Registry);
initializePartiallyInlineLibCallsPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);

View File

@ -0,0 +1,127 @@
; RUN: opt < %s -nary-reassociate -S | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
declare void @foo(i32 %a)
; foo(a + c);
; foo((a + (b + c));
; =>
; t = a + c;
; foo(t);
; foo(t + b);
define void @left_reassociate(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @left_reassociate(
%1 = add i32 %a, %c
; CHECK: [[BASE:%[a-zA-Z0-9]+]] = add i32 %a, %c
call void @foo(i32 %1)
%2 = add i32 %b, %c
%3 = add i32 %a, %2
; CHECK: add i32 [[BASE]], %b
call void @foo(i32 %3)
ret void
}
; foo(a + c);
; foo((a + b) + c);
; =>
; t = a + c;
; foo(t);
; foo(t + b);
define void @right_reassociate(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @right_reassociate(
%1 = add i32 %a, %c
; CHECK: [[BASE:%[a-zA-Z0-9]+]] = add i32 %a, %c
call void @foo(i32 %1)
%2 = add i32 %a, %b
%3 = add i32 %2, %c
; CHECK: add i32 [[BASE]], %b
call void @foo(i32 %3)
ret void
}
; t1 = a + c;
; foo(t1);
; t2 = a + b;
; foo(t2);
; t3 = t2 + c;
; foo(t3);
;
; Do not rewrite t3 into t1 + b because t2 is used elsewhere and is likely free.
define void @no_reassociate(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @no_reassociate(
%1 = add i32 %a, %c
; CHECK: add i32 %a, %c
call void @foo(i32 %1)
%2 = add i32 %a, %b
; CHECK: add i32 %a, %b
call void @foo(i32 %2)
%3 = add i32 %2, %c
; CHECK: add i32 %2, %c
call void @foo(i32 %3)
ret void
}
; if (p1)
; foo(a + c);
; if (p2)
; foo(a + c);
; if (p3)
; foo((a + b) + c);
;
; No action because (a + c) does not dominate ((a + b) + c).
define void @conditional(i1 %p1, i1 %p2, i1 %p3, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @conditional(
entry:
br i1 %p1, label %then1, label %branch1
then1:
%0 = add i32 %a, %c
; CHECK: add i32 %a, %c
call void @foo(i32 %0)
br label %branch1
branch1:
br i1 %p2, label %then2, label %branch2
then2:
%1 = add i32 %a, %c
; CHECK: add i32 %a, %c
call void @foo(i32 %1)
br label %branch2
branch2:
br i1 %p3, label %then3, label %return
then3:
%2 = add i32 %a, %b
; CHECK: %2 = add i32 %a, %b
%3 = add i32 %2, %c
; CHECK: add i32 %2, %c
call void @foo(i32 %3)
br label %return
return:
ret void
}
; foo((a + b) + c)
; foo(((a + d) + b) + c)
; =>
; t = (a + b) + c;
; foo(t);
; foo(t + d);
define void @quaternary(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @quaternary(
%1 = add i32 %a, %b
%2 = add i32 %1, %c
call void @foo(i32 %2)
; CHECK: call void @foo(i32 [[TMP1:%[a-zA-Z0-9]]])
%3 = add i32 %a, %d
%4 = add i32 %3, %b
%5 = add i32 %4, %c
; CHECK: [[TMP2:%[a-zA-Z0-9]]] = add i32 [[TMP1]], %d
call void @foo(i32 %5)
; CHECK: call void @foo(i32 [[TMP2]]
ret void
}