mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-01 15:11:24 +00:00
Simplify n-ary adds by reassociation
Summary: This transformation reassociates a n-ary add so that the add can partially reuse existing instructions. For example, this pass can simplify void foo(int a, int b) { bar(a + b); bar((a + 2) + b); } to void foo(int a, int b) { int t = a + b; bar(t); bar(t + 2); } saving one add instruction. Fixes PR22357 (https://llvm.org/bugs/show_bug.cgi?id=22357). Test Plan: nary-add.ll Reviewers: broune, dberlin, hfinkel, meheff, sanjoy, atrick Reviewed By: sanjoy, atrick Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8950 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234855 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
35adce33f1
commit
9cecacd16a
@ -206,6 +206,7 @@ void initializeMergedLoadStoreMotionPass(PassRegistry &);
|
||||
void initializeMetaRenamerPass(PassRegistry&);
|
||||
void initializeMergeFunctionsPass(PassRegistry&);
|
||||
void initializeModuleDebugInfoPrinterPass(PassRegistry&);
|
||||
void initializeNaryReassociatePass(PassRegistry&);
|
||||
void initializeNoAAPass(PassRegistry&);
|
||||
void initializeObjCARCAliasAnalysisPass(PassRegistry&);
|
||||
void initializeObjCARCAPElimPass(PassRegistry&);
|
||||
|
@ -109,6 +109,7 @@ namespace {
|
||||
(void) llvm::createLowerExpectIntrinsicPass();
|
||||
(void) llvm::createLowerInvokePass();
|
||||
(void) llvm::createLowerSwitchPass();
|
||||
(void) llvm::createNaryReassociatePass();
|
||||
(void) llvm::createNoAAPass();
|
||||
(void) llvm::createObjCARCAliasAnalysisPass();
|
||||
(void) llvm::createObjCARCAPElimPass();
|
||||
|
@ -457,6 +457,12 @@ FunctionPass *createRewriteStatepointsForGCPass();
|
||||
//
|
||||
FunctionPass *createFloat2IntPass();
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// NaryReassociate - Simplify n-ary operations by reassociation.
|
||||
//
|
||||
FunctionPass *createNaryReassociatePass();
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
@ -29,6 +29,7 @@ add_llvm_library(LLVMScalarOpts
|
||||
LowerExpectIntrinsic.cpp
|
||||
MemCpyOptimizer.cpp
|
||||
MergedLoadStoreMotion.cpp
|
||||
NaryReassociate.cpp
|
||||
PartiallyInlineLibCalls.cpp
|
||||
PlaceSafepoints.cpp
|
||||
Reassociate.cpp
|
||||
|
206
lib/Transforms/Scalar/NaryReassociate.cpp
Normal file
206
lib/Transforms/Scalar/NaryReassociate.cpp
Normal file
@ -0,0 +1,206 @@
|
||||
//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass reassociates n-ary add expressions and eliminates the redundancy
|
||||
// exposed by the reassociation.
|
||||
//
|
||||
// A motivating example:
|
||||
//
|
||||
// void foo(int a, int b) {
|
||||
// bar(a + b);
|
||||
// bar((a + 2) + b);
|
||||
// }
|
||||
//
|
||||
// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
|
||||
// the above code to
|
||||
//
|
||||
// int t = a + b;
|
||||
// bar(t);
|
||||
// bar(t + 2);
|
||||
//
|
||||
// However, the Reassociate pass is unable to do that because it processes each
|
||||
// instruction individually and believes (a + 2) + b is the best form according
|
||||
// to its rank system.
|
||||
//
|
||||
// To address this limitation, NaryReassociate reassociates an expression in a
|
||||
// form that reuses existing instructions. As a result, NaryReassociate can
|
||||
// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
|
||||
// (a + b) is computed before.
|
||||
//
|
||||
// NaryReassociate works as follows. For every instruction in the form of (a +
|
||||
// b) + c, it checks whether a + c or b + c is already computed by a dominating
|
||||
// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
|
||||
// c) + a respectively. To efficiently look up whether an expression is
|
||||
// computed before, we store each instruction seen and its SCEV into an
|
||||
// SCEV-to-instruction map.
|
||||
//
|
||||
// Although the algorithm pattern-matches only ternary additions, it
|
||||
// automatically handles many >3-ary expressions by walking through the function
|
||||
// in the depth-first order. For example, given
|
||||
//
|
||||
// (a + c) + d
|
||||
// ((a + b) + c) + d
|
||||
//
|
||||
// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
|
||||
// ((a + c) + b) + d into ((a + c) + d) + b.
|
||||
//
|
||||
// Limitations and TODO items:
|
||||
//
|
||||
// 1) We only considers n-ary adds for now. This should be extended and
|
||||
// generalized.
|
||||
//
|
||||
// 2) Besides arithmetic operations, similar reassociation can be applied to
|
||||
// GEPs. For example, if
|
||||
// X = &arr[a]
|
||||
// dominates
|
||||
// Y = &arr[a + b]
|
||||
// we may rewrite Y into X + b.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
using namespace llvm;
|
||||
using namespace PatternMatch;
|
||||
|
||||
#define DEBUG_TYPE "nary-reassociate"
|
||||
|
||||
namespace {
|
||||
class NaryReassociate : public FunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
NaryReassociate(): FunctionPass(ID) {
|
||||
initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnFunction(Function &F) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
// TODO: can we preserve ScalarEvolution?
|
||||
AU.addRequired<ScalarEvolution>();
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
|
||||
private:
|
||||
// Reasssociates I to a better form.
|
||||
Instruction *tryReassociateAdd(Instruction *I);
|
||||
// A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
|
||||
Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
|
||||
// Rewrites I to LHS + RHS if LHS is computed already.
|
||||
Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
|
||||
|
||||
DominatorTree *DT;
|
||||
ScalarEvolution *SE;
|
||||
// A lookup table quickly telling which instructions compute the given SCEV.
|
||||
// Note that there can be multiple instructions at different locations
|
||||
// computing to the same SCEV. For example,
|
||||
// if (p1)
|
||||
// foo(a + b);
|
||||
// if (p2)
|
||||
// bar(a + b);
|
||||
DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
char NaryReassociate::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
|
||||
false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
||||
INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
|
||||
false, false)
|
||||
|
||||
FunctionPass *llvm::createNaryReassociatePass() {
|
||||
return new NaryReassociate();
|
||||
}
|
||||
|
||||
bool NaryReassociate::runOnFunction(Function &F) {
|
||||
if (skipOptnoneFunction(F))
|
||||
return false;
|
||||
|
||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
SE = &getAnalysis<ScalarEvolution>();
|
||||
|
||||
// Traverse the dominator tree in the depth-first order. This order makes sure
|
||||
// all bases of a candidate are in Candidates when we process it.
|
||||
bool Changed = false;
|
||||
SeenExprs.clear();
|
||||
for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
|
||||
Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
|
||||
BasicBlock *BB = Node->getBlock();
|
||||
for (auto I = BB->begin(); I != BB->end(); ++I) {
|
||||
if (I->getOpcode() == Instruction::Add) {
|
||||
if (Instruction *NewI = tryReassociateAdd(I)) {
|
||||
I->replaceAllUsesWith(NewI);
|
||||
I->eraseFromParent();
|
||||
I = NewI;
|
||||
}
|
||||
// We should add the rewritten instruction because tryReassociateAdd may
|
||||
// have invalidated the original one.
|
||||
SeenExprs[SE->getSCEV(I)].push_back(I);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
Instruction *NaryReassociate::tryReassociateAdd(Instruction *I) {
|
||||
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
|
||||
if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
|
||||
return NewI;
|
||||
if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
|
||||
return NewI;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
|
||||
Instruction *I) {
|
||||
Value *A = nullptr, *B = nullptr;
|
||||
// To be conservative, we reassociate I only when it is the only user of A+B.
|
||||
if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
|
||||
// I = (A + B) + RHS
|
||||
// = (A + RHS) + B or (B + RHS) + A
|
||||
const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
|
||||
const SCEV *RHSExpr = SE->getSCEV(RHS);
|
||||
if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
|
||||
return NewI;
|
||||
if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
|
||||
return NewI;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
|
||||
Value *RHS, Instruction *I) {
|
||||
auto Pos = SeenExprs.find(LHSExpr);
|
||||
// Bail out if LHSExpr is not previously seen.
|
||||
if (Pos == SeenExprs.end())
|
||||
return nullptr;
|
||||
|
||||
auto &LHSCandidates = Pos->second;
|
||||
unsigned NumIterations = 0;
|
||||
// Search at most 10 items to avoid running quadratically.
|
||||
static const unsigned MaxNumIterations = 10;
|
||||
for (auto LHS = LHSCandidates.rbegin();
|
||||
LHS != LHSCandidates.rend() && NumIterations < MaxNumIterations;
|
||||
++LHS, ++NumIterations) {
|
||||
if (DT->dominates(*LHS, I)) {
|
||||
Instruction *NewI = BinaryOperator::CreateAdd(*LHS, RHS, "", I);
|
||||
NewI->takeName(I);
|
||||
return NewI;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
||||
initializeLowerExpectIntrinsicPass(Registry);
|
||||
initializeMemCpyOptPass(Registry);
|
||||
initializeMergedLoadStoreMotionPass(Registry);
|
||||
initializeNaryReassociatePass(Registry);
|
||||
initializePartiallyInlineLibCallsPass(Registry);
|
||||
initializeReassociatePass(Registry);
|
||||
initializeRegToMemPass(Registry);
|
||||
|
127
test/Transforms/NaryReassociate/nary-add.ll
Normal file
127
test/Transforms/NaryReassociate/nary-add.ll
Normal file
@ -0,0 +1,127 @@
|
||||
; RUN: opt < %s -nary-reassociate -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
|
||||
declare void @foo(i32 %a)
|
||||
|
||||
; foo(a + c);
|
||||
; foo((a + (b + c));
|
||||
; =>
|
||||
; t = a + c;
|
||||
; foo(t);
|
||||
; foo(t + b);
|
||||
define void @left_reassociate(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @left_reassociate(
|
||||
%1 = add i32 %a, %c
|
||||
; CHECK: [[BASE:%[a-zA-Z0-9]+]] = add i32 %a, %c
|
||||
call void @foo(i32 %1)
|
||||
%2 = add i32 %b, %c
|
||||
%3 = add i32 %a, %2
|
||||
; CHECK: add i32 [[BASE]], %b
|
||||
call void @foo(i32 %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; foo(a + c);
|
||||
; foo((a + b) + c);
|
||||
; =>
|
||||
; t = a + c;
|
||||
; foo(t);
|
||||
; foo(t + b);
|
||||
define void @right_reassociate(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @right_reassociate(
|
||||
%1 = add i32 %a, %c
|
||||
; CHECK: [[BASE:%[a-zA-Z0-9]+]] = add i32 %a, %c
|
||||
call void @foo(i32 %1)
|
||||
%2 = add i32 %a, %b
|
||||
%3 = add i32 %2, %c
|
||||
; CHECK: add i32 [[BASE]], %b
|
||||
call void @foo(i32 %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; t1 = a + c;
|
||||
; foo(t1);
|
||||
; t2 = a + b;
|
||||
; foo(t2);
|
||||
; t3 = t2 + c;
|
||||
; foo(t3);
|
||||
;
|
||||
; Do not rewrite t3 into t1 + b because t2 is used elsewhere and is likely free.
|
||||
define void @no_reassociate(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @no_reassociate(
|
||||
%1 = add i32 %a, %c
|
||||
; CHECK: add i32 %a, %c
|
||||
call void @foo(i32 %1)
|
||||
%2 = add i32 %a, %b
|
||||
; CHECK: add i32 %a, %b
|
||||
call void @foo(i32 %2)
|
||||
%3 = add i32 %2, %c
|
||||
; CHECK: add i32 %2, %c
|
||||
call void @foo(i32 %3)
|
||||
ret void
|
||||
}
|
||||
|
||||
; if (p1)
|
||||
; foo(a + c);
|
||||
; if (p2)
|
||||
; foo(a + c);
|
||||
; if (p3)
|
||||
; foo((a + b) + c);
|
||||
;
|
||||
; No action because (a + c) does not dominate ((a + b) + c).
|
||||
define void @conditional(i1 %p1, i1 %p2, i1 %p3, i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @conditional(
|
||||
entry:
|
||||
br i1 %p1, label %then1, label %branch1
|
||||
|
||||
then1:
|
||||
%0 = add i32 %a, %c
|
||||
; CHECK: add i32 %a, %c
|
||||
call void @foo(i32 %0)
|
||||
br label %branch1
|
||||
|
||||
branch1:
|
||||
br i1 %p2, label %then2, label %branch2
|
||||
|
||||
then2:
|
||||
%1 = add i32 %a, %c
|
||||
; CHECK: add i32 %a, %c
|
||||
call void @foo(i32 %1)
|
||||
br label %branch2
|
||||
|
||||
branch2:
|
||||
br i1 %p3, label %then3, label %return
|
||||
|
||||
then3:
|
||||
%2 = add i32 %a, %b
|
||||
; CHECK: %2 = add i32 %a, %b
|
||||
%3 = add i32 %2, %c
|
||||
; CHECK: add i32 %2, %c
|
||||
call void @foo(i32 %3)
|
||||
br label %return
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; foo((a + b) + c)
|
||||
; foo(((a + d) + b) + c)
|
||||
; =>
|
||||
; t = (a + b) + c;
|
||||
; foo(t);
|
||||
; foo(t + d);
|
||||
define void @quaternary(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: @quaternary(
|
||||
%1 = add i32 %a, %b
|
||||
%2 = add i32 %1, %c
|
||||
call void @foo(i32 %2)
|
||||
; CHECK: call void @foo(i32 [[TMP1:%[a-zA-Z0-9]]])
|
||||
%3 = add i32 %a, %d
|
||||
%4 = add i32 %3, %b
|
||||
%5 = add i32 %4, %c
|
||||
; CHECK: [[TMP2:%[a-zA-Z0-9]]] = add i32 [[TMP1]], %d
|
||||
call void @foo(i32 %5)
|
||||
; CHECK: call void @foo(i32 [[TMP2]]
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user