diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index fef16f6a266..2c1ad10517b 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -163,7 +163,6 @@ namespace { /// Allocas - The alloca instructions being promoted. /// std::vector Allocas; - SmallVector &RetryList; DominatorTree &DT; DominanceFrontier &DF; @@ -200,10 +199,9 @@ namespace { /// BBNumPreds - Lazily compute the number of predecessors a block has. DenseMap BBNumPreds; public: - PromoteMem2Reg(const std::vector &A, - SmallVector &Retry, DominatorTree &dt, + PromoteMem2Reg(const std::vector &A, DominatorTree &dt, DominanceFrontier &df, AliasSetTracker *ast) - : Allocas(A), RetryList(Retry), DT(dt), DF(df), AST(ast) {} + : Allocas(A), DT(dt), DF(df), AST(ast) {} void run(); @@ -243,13 +241,10 @@ namespace { void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI); - - bool PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI, + void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI); - void PromoteLocallyUsedAllocas(BasicBlock *BB, - const std::vector &AIs, - LargeBlockInfo &LBI); + void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, std::vector &Worklist); @@ -315,13 +310,6 @@ namespace { void PromoteMem2Reg::run() { Function &F = *DF.getRoot()->getParent(); - // LocallyUsedAllocas - Keep track of all of the alloca instructions which are - // only used in a single basic block. These instructions can be efficiently - // promoted by performing a single linear scan over that one block. Since - // individual basic blocks are sometimes large, we group together all allocas - // that are live in a single basic block by the basic block they are live in. - std::map > LocallyUsedAllocas; - if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaInfo Info; @@ -376,11 +364,29 @@ void PromoteMem2Reg::run() { // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { - LocallyUsedAllocas[Info.OnlyBlock].push_back(AI); + PromoteSingleBlockAlloca(AI, Info, LBI); - // Remove the alloca from the Allocas list, since it will be processed. - RemoveFromAllocasList(AllocaNum); - continue; + // Finally, after the scan, check to see if the stores are all that is + // left. + if (Info.UsingBlocks.empty()) { + + // Remove the (now dead) stores and alloca. + while (!AI->use_empty()) { + StoreInst *SI = cast(AI->use_back()); + SI->eraseFromParent(); + LBI.deleteValue(SI); + } + + if (AST) AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + + ++NumLocalPromoted; + continue; + } } // If we haven't computed a numbering for the BB's in the function, do so @@ -406,26 +412,6 @@ void PromoteMem2Reg::run() { DetermineInsertionPoint(AI, AllocaNum, Info); } - // Process all allocas which are only used in a single basic block. - for (std::map >::iterator I = - LocallyUsedAllocas.begin(), E = LocallyUsedAllocas.end(); I != E; ++I){ - const std::vector &LocAllocas = I->second; - assert(!LocAllocas.empty() && "empty alloca list??"); - - // It's common for there to only be one alloca in the list. Handle it - // efficiently. - if (LocAllocas.size() == 1) { - // If we can do the quick promotion pass, do so now. - if (PromoteLocallyUsedAlloca(I->first, LocAllocas[0], LBI)) - RetryList.push_back(LocAllocas[0]); // Failed, retry later. - } else { - // Locally promote anything possible. Note that if this is unable to - // promote a particular alloca, it puts the alloca onto the Allocas vector - // for global processing. - PromoteLocallyUsedAllocas(I->first, LocAllocas, LBI); - } - } - if (Allocas.empty()) return; // All of the allocas must have been trivial! @@ -752,7 +738,16 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, } -/// PromoteLocallyUsedAlloca - Many allocas are only used within a single basic +/// StoreIndexSearchPredicate - This is a helper predicate used to search by the +/// first element of a pair. +struct StoreIndexSearchPredicate { + bool operator()(const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + } +}; + +/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic /// block. If this is the case, avoid traversing the CFG and inserting a lot of /// potentially useless PHI nodes by just performing a single linear pass over /// the basic block using the Alloca. @@ -766,126 +761,74 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, /// /// ... so long as A is not used before undef is set. /// -bool PromoteMem2Reg::PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI, +void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI) { - assert(!AI->use_empty() && "There are no uses of the alloca!"); + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + // Walk the use-def list of the alloca, getting the locations of all stores. + typedef SmallVector, 64> StoresByIndexTy; + StoresByIndexTy StoresByIndex; + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast(*UI)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); - // Handle degenerate cases quickly. - if (AI->hasOneUse()) { - Instruction *U = cast(AI->use_back()); - if (LoadInst *LI = dyn_cast(U)) { - // Must be a load of uninitialized value. - LI->replaceAllUsesWith(UndefValue::get(AI->getAllocatedType())); - if (AST && isa(LI->getType())) - AST->deleteValue(LI); - } else { - // Otherwise it must be a store which is never read. - assert(isa(U)); - } - LBI.deleteValue(U); - BB->getInstList().erase(U); - } else { - // Uses of the uninitialized memory location shall get undef. - Value *CurVal = 0; - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - if (LoadInst *LI = dyn_cast(Inst)) { - if (LI->getOperand(0) == AI) { - if (!CurVal) return true; // Could not locally promote! - - // Loads just returns the "current value"... - LI->replaceAllUsesWith(CurVal); - if (AST && isa(LI->getType())) - AST->deleteValue(LI); - BB->getInstList().erase(LI); - LBI.deleteValue(LI); - } - } else if (StoreInst *SI = dyn_cast(Inst)) { - if (SI->getOperand(1) == AI) { - // Store updates the "current value"... - CurVal = SI->getOperand(0); - BB->getInstList().erase(SI); - LBI.deleteValue(SI); - } + // If there are no stores to the alloca, just replace any loads with undef. + if (StoresByIndex.empty()) { + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) + if (LoadInst *LI = dyn_cast(*UI++)) { + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + if (AST && isa(LI->getType())) + AST->deleteValue(LI); + LBI.deleteValue(LI); + LI->eraseFromParent(); } - } - } - - // After traversing the basic block, there should be no more uses of the - // alloca: remove it now. - assert(AI->use_empty() && "Uses of alloca from more than one BB??"); - if (AST) AST->deleteValue(AI); - AI->eraseFromParent(); - LBI.deleteValue(AI); - - ++NumLocalPromoted; - return false; -} - -/// PromoteLocallyUsedAllocas - This method is just like -/// PromoteLocallyUsedAlloca, except that it processes multiple alloca -/// instructions in parallel. This is important in cases where we have large -/// basic blocks, as we don't want to rescan the entire basic block for each -/// alloca which is locally used in it (which might be a lot). -void PromoteMem2Reg:: -PromoteLocallyUsedAllocas(BasicBlock *BB, const std::vector &AIs, - LargeBlockInfo &LBI) { - DenseMap CurValues; - for (unsigned i = 0, e = AIs.size(); i != e; ++i) - CurValues[AIs[i]] = 0; // Insert with null value - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - if (LoadInst *LI = dyn_cast(Inst)) { - // Is this a load of an alloca we are tracking? - if (AllocaInst *AI = dyn_cast(LI->getOperand(0))) { - DenseMap::iterator AIt = CurValues.find(AI); - if (AIt != CurValues.end()) { - // If loading an uninitialized value, allow the inter-block case to - // handle it. Due to control flow, this might actually be ok. - if (AIt->second == 0) { // Use of locally uninitialized value?? - RetryList.push_back(AI); // Retry elsewhere. - CurValues.erase(AIt); // Stop tracking this here. - if (CurValues.empty()) return; - } else { - // Loads just returns the "current value"... - LI->replaceAllUsesWith(AIt->second); - if (AST && isa(LI->getType())) - AST->deleteValue(LI); - BB->getInstList().erase(LI); - LBI.deleteValue(LI); - } - } - } - } else if (StoreInst *SI = dyn_cast(Inst)) { - if (AllocaInst *AI = dyn_cast(SI->getOperand(1))) { - DenseMap::iterator AIt = CurValues.find(AI); - if (AIt != CurValues.end()) { - // Store updates the "current value"... - AIt->second = SI->getOperand(0); - SI->eraseFromParent(); - LBI.deleteValue(SI); - } - } - } + return; } - // At the end of the block scan, all allocas in CurValues are dead. - for (DenseMap::iterator I = CurValues.begin(), - E = CurValues.end(); I != E; ++I) { - AllocaInst *AI = I->first; - assert(AI->use_empty() && "Uses of alloca from more than one BB??"); - if (AST) AST->deleteValue(AI); - AI->eraseFromParent(); - LBI.deleteValue(AI); + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + std::sort(StoresByIndex.begin(), StoresByIndex.end()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + LoadInst *LI = dyn_cast(*UI++); + if (!LI) continue; + + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower than this load. + StoresByIndexTy::iterator I = + std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), + std::pair(LoadIdx, 0), + StoreIndexSearchPredicate()); + + // If there is no store before this load, then we can't promote this load. + if (I == StoresByIndex.begin()) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + + // Otherwise, there was a store before this load, the load takes its value. + --I; + LI->replaceAllUsesWith(I->second->getOperand(0)); + if (AST && isa(LI->getType())) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); } - - NumLocalPromoted += CurValues.size(); } - // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific // Alloca returns true if there wasn't already a phi-node for that variable // @@ -1044,26 +987,5 @@ void llvm::PromoteMemToReg(const std::vector &Allocas, // If there is nothing to do, bail out... if (Allocas.empty()) return; - SmallVector RetryList; - PromoteMem2Reg(Allocas, RetryList, DT, DF, AST).run(); - - // PromoteMem2Reg may not have been able to promote all of the allocas in one - // pass, run it again if needed. - std::vector NewAllocas; - while (!RetryList.empty()) { - // If we need to retry some allocas, this is due to there being no store - // before a read in a local block. To counteract this, insert a store of - // undef into the alloca right after the alloca itself. - for (unsigned i = 0, e = RetryList.size(); i != e; ++i) { - BasicBlock::iterator BBI = RetryList[i]; - - new StoreInst(UndefValue::get(RetryList[i]->getAllocatedType()), - RetryList[i], ++BBI); - } - - NewAllocas.assign(RetryList.begin(), RetryList.end()); - RetryList.clear(); - PromoteMem2Reg(NewAllocas, RetryList, DT, DF, AST).run(); - NewAllocas.clear(); - } + PromoteMem2Reg(Allocas, DT, DF, AST).run(); }