diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 453740517bd..c9a142e7b80 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -36,7 +36,7 @@ namespace llvm { /// Invalid - Clients of MemDep never see this. Invalid = 0, /// Normal - This is a normal instruction dependence. The pointer member - /// of the DepResultTy pair holds the instruction. + /// of the MemDepResult pair holds the instruction. Normal, /// NonLocal - This marker indicates that the query has no dependency in @@ -85,8 +85,10 @@ namespace llvm { /// is depended on. Otherwise, return null. Instruction *getInst() const { return Value.getPointer(); } - bool operator==(const MemDepResult &M) { return M.Value == Value; } - bool operator!=(const MemDepResult &M) { return M.Value != Value; } + bool operator==(const MemDepResult &M) const { return M.Value == Value; } + bool operator!=(const MemDepResult &M) const { return M.Value != Value; } + bool operator<(const MemDepResult &M) const { return M.Value < Value; } + bool operator>(const MemDepResult &M) const { return M.Value > Value; } private: friend class MemoryDependenceAnalysis; /// Dirty - Entries with this marker occur in a LocalDeps map or @@ -95,14 +97,14 @@ namespace llvm { /// instruction pointer. If so, the pointer is an instruction in the /// block where scanning can start from, saving some work. /// - /// In a default-constructed DepResultTy object, the type will be Dirty + /// In a default-constructed MemDepResult object, the type will be Dirty /// and the instruction pointer will be null. /// - + /// isDirty - Return true if this is a MemDepResult in its dirty/invalid. /// state. bool isDirty() const { return Value.getInt() == Invalid; } - + static MemDepResult getDirty(Instruction *Inst) { return MemDepResult(PairTy(Inst, Invalid)); } @@ -128,12 +130,15 @@ namespace llvm { typedef DenseMap LocalDepMapType; LocalDepMapType LocalDeps; - typedef DenseMap NonLocalDepInfo; + public: + typedef std::pair NonLocalDepEntry; + typedef std::vector NonLocalDepInfo; + private: /// PerInstNLInfo - This is the instruction we keep for each cached access /// that we have for an instruction. The pointer is an owning pointer and /// the bool indicates whether we have any dirty bits in the set. - typedef PointerIntPair PerInstNLInfo; + typedef std::pair PerInstNLInfo; // A map from instructions to their non-local dependencies. typedef DenseMap NonLocalDepMapType; @@ -162,9 +167,7 @@ namespace llvm { /// Clean up memory in between runs void releaseMemory() { LocalDeps.clear(); - for (NonLocalDepMapType::iterator I = NonLocalDeps.begin(), - E = NonLocalDeps.end(); I != E; ++I) - delete I->second.getPointer(); + NonLocalDeps.clear(); NonLocalDeps.clear(); ReverseLocalDeps.clear(); ReverseNonLocalDeps.clear(); @@ -194,11 +197,14 @@ namespace llvm { /// potentially live across. The returned set of results will include a /// "NonLocal" result for all blocks where the value is live across. /// - /// This method assumes the instruction returns a "nonlocal" dependency + /// This method assumes the instruction returns a "NonLocal" dependency /// within its own block. - void getNonLocalDependency(Instruction *QueryInst, - SmallVectorImpl > &Result); + /// + /// This returns a reference to an internal data structure that may be + /// invalidated on the next non-local query or when an instruction is + /// removed. Clients must copy this data if they want it around longer than + /// that. + const NonLocalDepInfo &getNonLocalDependency(Instruction *QueryInst); /// removeInstruction - Remove an instruction from the dependence analysis, /// updating the dependence of instructions that previously depended on it. diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 14646adf7b5..c47ec0493a5 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -28,9 +28,9 @@ #include "llvm/Target/TargetData.h" using namespace llvm; -STATISTIC(NumCacheNonLocal, "Number of cached non-local responses"); +STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); +STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); - char MemoryDependenceAnalysis::ID = 0; // Register this pass... @@ -51,10 +51,12 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { return false; } + /// getCallSiteDependency - Private helper for finding the local dependencies /// of a call site. MemDepResult MemoryDependenceAnalysis:: getCallSiteDependency(CallSite C, BasicBlock::iterator ScanIt, BasicBlock *BB) { + // Walk backwards through the block, looking for dependencies while (ScanIt != BB->begin()) { Instruction *Inst = --ScanIt; @@ -224,16 +226,13 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { /// This method assumes the instruction returns a "nonlocal" dependency /// within its own block. /// -void MemoryDependenceAnalysis:: -getNonLocalDependency(Instruction *QueryInst, - SmallVectorImpl > &Result) { +const MemoryDependenceAnalysis::NonLocalDepInfo & +MemoryDependenceAnalysis::getNonLocalDependency(Instruction *QueryInst) { assert(getDependency(QueryInst).isNonLocal() && "getNonLocalDependency should only be used on insts with non-local deps!"); PerInstNLInfo &CacheP = NonLocalDeps[QueryInst]; - if (CacheP.getPointer() == 0) CacheP.setPointer(new NonLocalDepInfo()); - NonLocalDepInfo &Cache = *CacheP.getPointer(); + NonLocalDepInfo &Cache = CacheP.first; /// DirtyBlocks - This is the set of blocks that need to be recomputed. In /// the cached case, this can happen due to instructions being deleted etc. In @@ -242,17 +241,24 @@ getNonLocalDependency(Instruction *QueryInst, SmallVector DirtyBlocks; if (!Cache.empty()) { + // Okay, we have a cache entry. If we know it is not dirty, just return it + // with no computation. + if (!CacheP.second) { + NumCacheNonLocal++; + return Cache; + } + // If we already have a partially computed set of results, scan them to - // determine what is dirty, seeding our initial DirtyBlocks worklist. The - // Int bit of CacheP tells us if we have anything dirty. - if (CacheP.getInt()) - for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); - I != E; ++I) - if (I->second.isDirty()) - DirtyBlocks.push_back(I->first); + // determine what is dirty, seeding our initial DirtyBlocks worklist. + for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); + I != E; ++I) + if (I->second.isDirty()) + DirtyBlocks.push_back(I->first); - NumCacheNonLocal++; + // Sort the cache so that we can do fast binary search lookups below. + std::sort(Cache.begin(), Cache.end()); + ++NumCacheDirtyNonLocal; //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " // << Cache.size() << " cached: " << *QueryInst; } else { @@ -262,52 +268,80 @@ getNonLocalDependency(Instruction *QueryInst, NumUncacheNonLocal++; } + // Visited checked first, vector in sorted order. + SmallPtrSet Visited; + + unsigned NumSortedEntries = Cache.size(); + // Iterate while we still have blocks to update. while (!DirtyBlocks.empty()) { BasicBlock *DirtyBB = DirtyBlocks.back(); DirtyBlocks.pop_back(); - // Get the entry for this block. Note that this relies on MemDepResult - // default initializing to Dirty. - MemDepResult &DirtyBBEntry = Cache[DirtyBB]; + // Already processed this block? + if (!Visited.insert(DirtyBB)) + continue; + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, + std::make_pair(DirtyBB, MemDepResult())); + if (Entry != Cache.begin() && (&*Entry)[-1].first == DirtyBB) + --Entry; + + MemDepResult *ExistingResult = 0; + if (Entry != Cache.begin()+NumSortedEntries && + Entry->first == DirtyBB) { + // If we already have an entry, and if it isn't already dirty, the block + // is done. + if (!Entry->second.isDirty()) + continue; + + // Otherwise, remember this slot so we can update the value. + ExistingResult = &Entry->second; + } - // If DirtyBBEntry isn't dirty, it ended up on the worklist multiple times. - if (!DirtyBBEntry.isDirty()) continue; - // If the dirty entry has a pointer, start scanning from it so we don't have // to rescan the entire block. BasicBlock::iterator ScanPos = DirtyBB->end(); - if (Instruction *Inst = DirtyBBEntry.getInst()) { - ScanPos = Inst; + if (ExistingResult) { + if (Instruction *Inst = ExistingResult->getInst()) { + ScanPos = Inst; - // We're removing QueryInst's dependence on Inst. - SmallPtrSet &InstMap = ReverseNonLocalDeps[Inst]; - InstMap.erase(QueryInst); - if (InstMap.empty()) ReverseNonLocalDeps.erase(Inst); + // We're removing QueryInst's use of Inst. + SmallPtrSet &InstMap = ReverseNonLocalDeps[Inst]; + InstMap.erase(QueryInst); + if (InstMap.empty()) ReverseNonLocalDeps.erase(Inst); + } } // Find out if this block has a local dependency for QueryInst. - DirtyBBEntry = getDependencyFrom(QueryInst, ScanPos, DirtyBB); - + MemDepResult Dep = getDependencyFrom(QueryInst, ScanPos, DirtyBB); + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + *ExistingResult = Dep; + else + Cache.push_back(std::make_pair(DirtyBB, Dep)); + // If the block has a dependency (i.e. it isn't completely transparent to - // the value), remember it! - if (!DirtyBBEntry.isNonLocal()) { + // the value), remember the association! + if (!Dep.isNonLocal()) { // Keep the ReverseNonLocalDeps map up to date so we can efficiently // update this when we remove instructions. - if (Instruction *Inst = DirtyBBEntry.getInst()) + if (Instruction *Inst = Dep.getInst()) ReverseNonLocalDeps[Inst].insert(QueryInst); - continue; - } + } else { - // If the block *is* completely transparent to the load, we need to check - // the predecessors of this block. Add them to our worklist. - DirtyBlocks.append(pred_begin(DirtyBB), pred_end(DirtyBB)); + // If the block *is* completely transparent to the load, we need to check + // the predecessors of this block. Add them to our worklist. + DirtyBlocks.append(pred_begin(DirtyBB), pred_end(DirtyBB)); + } } - - // Copy the result into the output set. - for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); I != E;++I) - Result.push_back(std::make_pair(I->first, I->second)); + return Cache; } /// removeInstruction - Remove an instruction from the dependence analysis, @@ -318,12 +352,11 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // for any cached queries. NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst); if (NLDI != NonLocalDeps.end()) { - NonLocalDepInfo &BlockMap = *NLDI->second.getPointer(); + NonLocalDepInfo &BlockMap = NLDI->second.first; for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); DI != DE; ++DI) if (Instruction *Inst = DI->second.getInst()) ReverseNonLocalDeps[Inst].erase(RemInst); - delete &BlockMap; NonLocalDeps.erase(NLDI); } @@ -392,12 +425,11 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); PerInstNLInfo &INLD = NonLocalDeps[*I]; - assert(INLD.getPointer() != 0 && "Reverse mapping out of date?"); // The information is now dirty! - INLD.setInt(true); + INLD.second = true; - for (NonLocalDepInfo::iterator DI = INLD.getPointer()->begin(), - DE = INLD.getPointer()->end(); DI != DE; ++DI) { + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + DE = INLD.first.end(); DI != DE; ++DI) { if (DI->second.getInst() != RemInst) continue; // Convert to a dirty entry for the subsequent instruction. @@ -439,8 +471,8 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { E = NonLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); const PerInstNLInfo &INLD = I->second; - for (NonLocalDepInfo::iterator II = INLD.getPointer()->begin(), - EE = INLD.getPointer()->end(); II != EE; ++II) + for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), + EE = INLD.first.end(); II != EE; ++II) assert(II->second.getInst() != D && "Inst occurs in data structures"); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a7b38056ad1..292a97e392a 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -497,15 +497,15 @@ uint32_t ValueTable::lookup_or_add(Value* V) { return v; } - - SmallVector, 32> deps; - MD->getNonLocalDependency(C, deps); + + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + MD->getNonLocalDependency(C); CallInst* cdep = 0; // Check to see if we have a single dominating call instruction that is // identical to C. - for (SmallVector, 32> - ::iterator I = deps.begin(), E = deps.end(); I != E; ++I) { + for (unsigned i = 0, e = deps.size(); i != e; ++i) { + const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; // Ignore non-local dependencies. if (I->second.isNonLocal()) continue; @@ -868,11 +868,18 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig, bool GVN::processNonLocalLoad(LoadInst* L, SmallVectorImpl &toErase) { // Find the non-local dependencies of the load - SmallVector, 32> deps; - MD->getNonLocalDependency(L, deps); - + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + MD->getNonLocalDependency(L); DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << deps.size() << *L); - +#if 0 + DEBUG(for (unsigned i = 0, e = deps.size(); i != e; ++i) { + cerr << " " << deps[i].first->getName(); + if (Instruction *I = deps[i].second.getInst()) + cerr << *I; + else + cerr << "\n"; + }); +#endif // If we had to process more than one hundred blocks to find the // dependencies, this load isn't worth worrying about. Optimizing @@ -885,31 +892,36 @@ bool GVN::processNonLocalLoad(LoadInst* L, DenseMap repl; // Filter out useless results (non-locals, etc) - for (SmallVector, 32>::iterator - I = deps.begin(), E = deps.end(); I != E; ++I) { - if (I->second.isNone()) { - repl[I->first] = UndefValue::get(L->getType()); - continue; - } - - if (I->second.isNonLocal()) { + for (unsigned i = 0, e = deps.size(); i != e; ++i) { + BasicBlock *DepBB = deps[i].first; + MemDepResult DepInfo = deps[i].second; + + if (DepInfo.isNonLocal()) { // If this is a non-local dependency in the entry block, then we depend on // the value live-in at the start of the function. We could insert a load // in the entry block to get this, but for now we'll just bail out. + // // FIXME: Consider emitting a load in the entry block to catch this case! - if (I->first == EntryBlock) + // Tricky part is to sink so that it doesn't execute in places where it + // isn't needed. + if (DepBB == EntryBlock) return false; continue; } + + if (DepInfo.isNone()) { + repl[DepBB] = UndefValue::get(L->getType()); + continue; + } - if (StoreInst* S = dyn_cast(I->second.getInst())) { + if (StoreInst* S = dyn_cast(DepInfo.getInst())) { if (S->getPointerOperand() != L->getPointerOperand()) return false; - repl[I->first] = S->getOperand(0); - } else if (LoadInst* LD = dyn_cast(I->second.getInst())) { + repl[DepBB] = S->getOperand(0); + } else if (LoadInst* LD = dyn_cast(DepInfo.getInst())) { if (LD->getPointerOperand() != L->getPointerOperand()) return false; - repl[I->first] = LD; + repl[DepBB] = LD; } else { return false; }