Completely rework the datastructure GVN uses to represent the value number to leader mapping. Previously,

this was a tree of hashtables, and a query recursed into the table for the immediate dominator ad infinitum
if the initial lookup failed.  This led to really bad performance on tall, narrow CFGs.

We can instead replace it with what is conceptually a multimap of value numbers to leaders (actually
represented by a hashtable with a list of Value*'s as the value type), and then
determine which leader from that set to use very cheaply thanks to the DFS numberings maintained by
DominatorTree.  Because there are typically few duplicates of a given value, this scan tends to be
quite fast.  Additionally, we use a custom linked list and BumpPtr allocation to avoid any unnecessary
allocation in representing the value-side of the multimap.

This change brings with it a 15% (!) improvement in the total running time of GVN on 403.gcc, which I
think is pretty good considering that includes all the "real work" being done by MemDep as well.

The one downside to this approach is that we can no longer use GVN to perform simple conditional progation,
but that seems like an acceptable loss since we now have LVI and CorrelatedValuePropagation to pick up
the slack.  If you see conditional propagation that's not happening, please file bugs against LVI or CVP.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119714 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Owen Anderson 2010-11-18 18:32:40 +00:00
parent 056ab107ff
commit a04a0649e1
2 changed files with 86 additions and 124 deletions

View File

@ -40,6 +40,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@ -51,6 +52,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <list>
using namespace llvm;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
@ -674,7 +676,45 @@ namespace {
const TargetData* TD;
ValueTable VN;
DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
DenseMap<uint32_t, std::pair<Value*, void*> > NumberTable;
BumpPtrAllocator TableAllocator;
void insert_table(uint32_t N, Value *V) {
std::pair<Value*, void*>& Curr = NumberTable[N];
if (!Curr.first) {
Curr.first = V;
return;
}
std::pair<Value*, void*>* Node =
TableAllocator.Allocate<std::pair<Value*, void*> >();
Node->first = V;
Node->second = Curr.second;
Curr.second = Node;
}
void erase_table(uint32_t N, Value *V) {
std::pair<Value*, void*>* Prev = 0;
std::pair<Value*, void*>* Curr = &NumberTable[N];
while (Curr->first != V) {
Prev = Curr;
Curr = static_cast<std::pair<Value*, void*>*>(Curr->second);
}
if (Prev) {
Prev->second = Curr->second;
} else {
if (!Curr->second) {
Curr->first = 0;
} else {
std::pair<Value*, void*>* Next =
static_cast<std::pair<Value*, void*>*>(Curr->second);
Curr->first = Next->first;
Curr->second = Next->second;
}
}
}
// List of critical edges to be split between iterations.
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
@ -1847,16 +1887,25 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
}
Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
if (I == localAvail.end())
return 0;
ValueNumberScope *Locals = I->second;
while (Locals) {
DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
if (I != Locals->table.end())
return I->second;
Locals = Locals->parent;
std::pair<Value*, void*> Vals = NumberTable[num];
if (!Vals.first) return 0;
Instruction *Inst = dyn_cast<Instruction>(Vals.first);
if (!Inst) return Vals.first;
BasicBlock *Parent = Inst->getParent();
if (DT->dominates(Parent, BB))
return Inst;
std::pair<Value*, void*>* Next =
static_cast<std::pair<Value*, void*>*>(Vals.second);
while (Next) {
Instruction *CurrInst = dyn_cast<Instruction>(Next->first);
if (!CurrInst) return Next->first;
BasicBlock *Parent = CurrInst->getParent();
if (DT->dominates(Parent, BB))
return CurrInst;
Next = static_cast<std::pair<Value*, void*>*>(Next->second);
}
return 0;
@ -1889,7 +1938,7 @@ bool GVN::processInstruction(Instruction *I,
if (!Changed) {
unsigned Num = VN.lookup_or_add(LI);
localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
insert_table(Num, LI);
}
return Changed;
@ -1898,41 +1947,21 @@ bool GVN::processInstruction(Instruction *I,
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
Value *BranchCond = BI->getCondition();
uint32_t CondVN = VN.lookup_or_add(BranchCond);
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
if (TrueSucc->getSinglePredecessor())
localAvail[TrueSucc]->table[CondVN] =
ConstantInt::getTrue(TrueSucc->getContext());
if (FalseSucc->getSinglePredecessor())
localAvail[FalseSucc]->table[CondVN] =
ConstantInt::getFalse(TrueSucc->getContext());
return false;
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
} else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
insert_table(Num, I);
return false;
}
if (isa<PHINode>(I)) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
insert_table(Num, I);
// If the number we were assigned was a brand new VN, then we don't
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
} else if (Num == NextNum) {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
insert_table(Num, I);
// Perform fast-path value-number based elimination of values inherited from
// dominators.
@ -1946,7 +1975,7 @@ bool GVN::processInstruction(Instruction *I,
return true;
} else {
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
insert_table(Num, I);
}
return false;
@ -2095,20 +2124,19 @@ bool GVN::performPRE(Function &F) {
if (P == CurrentBlock) {
NumWithout = 2;
break;
} else if (!localAvail.count(P)) {
} else if (!DT->dominates(&F.getEntryBlock(), P)) {
NumWithout = 2;
break;
}
DenseMap<uint32_t, Value*>::iterator predV =
localAvail[P]->table.find(ValNo);
if (predV == localAvail[P]->table.end()) {
Value* predV = lookupNumber(P, ValNo);
if (predV == 0) {
PREPred = P;
++NumWithout;
} else if (predV->second == CurInst) {
} else if (predV == CurInst) {
NumWithout = 2;
} else {
predMap[P] = predV->second;
predMap[P] = predV;
++NumWith;
}
}
@ -2167,7 +2195,7 @@ bool GVN::performPRE(Function &F) {
++NumGVNPRE;
// Update the availability map to include the new instruction.
localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
insert_table(ValNo, PREInstr);
// Create a PHI to make the value available in this block.
PHINode* Phi = PHINode::Create(CurInst->getType(),
@ -2180,12 +2208,13 @@ bool GVN::performPRE(Function &F) {
}
VN.add(Phi, ValNo);
localAvail[CurrentBlock]->table[ValNo] = Phi;
insert_table(ValNo, Phi);
CurInst->replaceAllUsesWith(Phi);
if (MD && Phi->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
erase_table(ValNo, CurInst);
DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
if (MD) MD->removeInstruction(CurInst);
@ -2217,16 +2246,7 @@ bool GVN::splitCriticalEdges() {
/// iterateOnFunction - Executes one iteration of GVN
bool GVN::iterateOnFunction(Function &F) {
cleanupGlobalSets();
for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
if (DI->getIDom())
localAvail[DI->getBlock()] =
new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
else
localAvail[DI->getBlock()] = new ValueNumberScope(0);
}
// Top-down walk of the dominator tree
bool Changed = false;
#if 0
@ -2246,11 +2266,8 @@ bool GVN::iterateOnFunction(Function &F) {
void GVN::cleanupGlobalSets() {
VN.clear();
for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
delete I->second;
localAvail.clear();
NumberTable.clear();
TableAllocator.Reset();
}
/// verifyRemoved - Verify that the specified instruction does not occur in our
@ -2260,17 +2277,14 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
// Walk through the value number scope to make sure the instruction isn't
// ferreted away in it.
for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
const ValueNumberScope *VNS = I->second;
while (VNS) {
for (DenseMap<uint32_t, Value*>::const_iterator
II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
assert(II->second != Inst && "Inst still in value numbering scope!");
}
VNS = VNS->parent;
for (DenseMap<uint32_t, std::pair<Value*, void*> >::const_iterator
I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) {
std::pair<Value*, void*> const * Node = &I->second;
assert(Node->first != Inst && "Inst still in value numbering scope!");
while (Node->second) {
Node = static_cast<std::pair<Value*, void*>*>(Node->second);
assert(Node->first != Inst && "Inst still in value numbering scope!");
}
}
}

View File

@ -1,52 +0,0 @@
; RUN: opt < %s -basicaa -gvn -S | grep {br i1 false}
@a = external global i32 ; <i32*> [#uses=7]
define i32 @foo() nounwind {
entry:
%0 = load i32* @a, align 4 ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 4 ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1
bb: ; preds = %entry
br label %bb8
bb1: ; preds = %entry
%2 = load i32* @a, align 4 ; <i32> [#uses=1]
%3 = icmp eq i32 %2, 5 ; <i1> [#uses=1]
br i1 %3, label %bb2, label %bb3
bb2: ; preds = %bb1
br label %bb8
bb3: ; preds = %bb1
%4 = load i32* @a, align 4 ; <i32> [#uses=1]
%5 = icmp eq i32 %4, 4 ; <i1> [#uses=1]
br i1 %5, label %bb4, label %bb5
bb4: ; preds = %bb3
%6 = load i32* @a, align 4 ; <i32> [#uses=1]
%7 = add i32 %6, 5 ; <i32> [#uses=1]
br label %bb8
bb5: ; preds = %bb3
%8 = load i32* @a, align 4 ; <i32> [#uses=1]
%9 = icmp eq i32 %8, 5 ; <i1> [#uses=1]
br i1 %9, label %bb6, label %bb7
bb6: ; preds = %bb5
%10 = load i32* @a, align 4 ; <i32> [#uses=1]
%11 = add i32 %10, 4 ; <i32> [#uses=1]
br label %bb8
bb7: ; preds = %bb5
%12 = load i32* @a, align 4 ; <i32> [#uses=1]
br label %bb8
bb8: ; preds = %bb7, %bb6, %bb4, %bb2, %bb
%.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ] ; <i32> [#uses=1]
br label %return
return: ; preds = %bb8
ret i32 %.0
}