diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 7875ca79d1c..557cd6d2b53 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -27,7 +27,12 @@ using namespace llvm; STATISTIC(NumSimplify, "Number of insts simplified or DCE'd"); -STATISTIC(NumCSE, "Number of insts CSE'd"); +STATISTIC(NumCSE, "Number of insts CSE'd"); +STATISTIC(NumCSEMem, "Number of load and call insts CSE'd"); + +static unsigned getHash(const void *V) { + return DenseMapInfo::getHashValue(V); +} //===----------------------------------------------------------------------===// // SimpleValue @@ -78,10 +83,6 @@ template<> struct DenseMapInfo { }; } -unsigned getHash(const void *V) { - return DenseMapInfo::getHashValue(V); -} - unsigned DenseMapInfo::getHashValue(SimpleValue Val) { Instruction *Inst = Val.Inst; @@ -124,9 +125,77 @@ bool DenseMapInfo::isEqual(SimpleValue LHS, SimpleValue RHS) { return LHSI->isIdenticalTo(RHSI); } +//===----------------------------------------------------------------------===// +// MemoryValue +//===----------------------------------------------------------------------===// + +namespace { + /// MemoryValue - Instances of this struct represent available load and call + /// values in the scoped hash table. + struct MemoryValue { + Instruction *Inst; + + bool isSentinel() const { + return Inst == DenseMapInfo::getEmptyKey() || + Inst == DenseMapInfo::getTombstoneKey(); + } + + static bool canHandle(Instruction *Inst) { + if (LoadInst *LI = dyn_cast(Inst)) + return !LI->isVolatile(); + if (CallInst *CI = dyn_cast(Inst)) + return CI->onlyReadsMemory(); + return false; + } + + static MemoryValue get(Instruction *I) { + MemoryValue X; X.Inst = I; + assert((X.isSentinel() || canHandle(I)) && "Inst can't be handled!"); + return X; + } + }; +} + +namespace llvm { + // MemoryValue is POD. + template<> struct isPodLike { + static const bool value = true; + }; + + template<> struct DenseMapInfo { + static inline MemoryValue getEmptyKey() { + return MemoryValue::get(DenseMapInfo::getEmptyKey()); + } + static inline MemoryValue getTombstoneKey() { + return MemoryValue::get(DenseMapInfo::getTombstoneKey()); + } + static unsigned getHashValue(MemoryValue Val); + static bool isEqual(MemoryValue LHS, MemoryValue RHS); + }; +} +unsigned DenseMapInfo::getHashValue(MemoryValue Val) { + Instruction *Inst = Val.Inst; + // Hash in all of the operands as pointers. + unsigned Res = 0; + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + Res ^= getHash(Inst->getOperand(i)) << i; + // Mix in the opcode. + return (Res << 1) ^ Inst->getOpcode(); +} + +bool DenseMapInfo::isEqual(MemoryValue LHS, MemoryValue RHS) { + Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; + + if (LHS.isSentinel() || RHS.isSentinel()) + return LHSI == RHSI; + + if (LHSI->getOpcode() != RHSI->getOpcode()) return false; + return LHSI->isIdenticalTo(RHSI); +} + //===----------------------------------------------------------------------===// -// EarlyCSE pass +// EarlyCSE pass. //===----------------------------------------------------------------------===// namespace { @@ -152,7 +221,21 @@ public: /// we find, otherwise we insert them so that dominated values can succeed in /// their lookup. ScopedHTType *AvailableValues; - + + typedef ScopedHashTable > MemHTType; + /// AvailableMemValues - This scoped hash table contains the current values of + /// loads and other read-only memory values. This allows us to get efficient + /// access to dominating loads we we find a fully redundant load. In addition + /// to the most recent load, we keep track of a generation count of the read, + /// which is compared against the current generation count. The current + /// generation count is incremented after every possibly writing memory + /// operation, which ensures that we only CSE loads with other loads that have + /// no intervening store. + MemHTType *AvailableMemValues; + + /// CurrentGeneration - This is the current generation of the memory value. + unsigned CurrentGeneration; + static char ID; explicit EarlyCSE() : FunctionPass(ID) { initializeEarlyCSEPass(*PassRegistry::getPassRegistry()); @@ -187,11 +270,23 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Define a scope in the scoped hash table. When we are done processing this // domtree node and recurse back up to our parent domtree node, this will pop // off all the values we install. - ScopedHashTableScope, - AllocatorTy> Scope(*AvailableValues); + ScopedHTType::ScopeTy Scope(*AvailableValues); + + // Define a scope for the memory values so that anything we add will get + // popped when we recurse back up to our parent domtree node. + MemHTType::ScopeTy MemScope(*AvailableMemValues); BasicBlock *BB = Node->getBlock(); + // If this block has a single predecessor, then the predecessor is the parent + // of the domtree node and all of the live out memory values are still current + // in this block. If this block has multiple predecessors, then they could + // have invalidated the live-out memory values of our parent value. For now, + // just be conservative and invalidate memory if this block has multiple + // predecessors. + if (BB->getSinglePredecessor() == 0) + ++CurrentGeneration; + bool Changed = false; // See if any instructions in the block can be eliminated. If so, do it. If @@ -219,27 +314,58 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } - // If this instruction is something that we can't value number, ignore it. - if (!SimpleValue::canHandle(Inst)) - continue; - - // See if the instruction has an available value. If so, use it. - if (Value *V = AvailableValues->lookup(SimpleValue::get(Inst))) { - DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n'); - Inst->replaceAllUsesWith(V); - Inst->eraseFromParent(); - Changed = true; - ++NumCSE; + // If this is a simple instruction that we can value number, process it. + if (SimpleValue::canHandle(Inst)) { + // See if the instruction has an available value. If so, use it. + if (Value *V = AvailableValues->lookup(SimpleValue::get(Inst))) { + DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n'); + Inst->replaceAllUsesWith(V); + Inst->eraseFromParent(); + Changed = true; + ++NumCSE; + continue; + } + + // Otherwise, just remember that this value is available. + AvailableValues->insert(SimpleValue::get(Inst), Inst); continue; } - // Otherwise, just remember that this value is available. - AvailableValues->insert(SimpleValue::get(Inst), Inst); + // If this is a read-only memory value, process it. + if (MemoryValue::canHandle(Inst)) { + // If we have an available version of this value, and if it is the right + // generation, replace this instruction. + std::pair InVal = + AvailableMemValues->lookup(MemoryValue::get(Inst)); + if (InVal.first != 0 && InVal.second == CurrentGeneration) { + DEBUG(dbgs() << "EarlyCSE CSE MEM: " << *Inst << " to: " + << *InVal.first << '\n'); + if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); + Inst->eraseFromParent(); + Changed = true; + ++NumCSEMem; + continue; + } + + // Otherwise, remember that we have this instruction. + AvailableMemValues->insert(MemoryValue::get(Inst), + std::pair(Inst, CurrentGeneration)); + continue; + } + + // Okay, this isn't something we can CSE at all. Check to see if it is + // something that could modify memory. If so, our available memory values + // cannot be used so bump the generation count. + if (Inst->mayWriteToMemory()) + ++CurrentGeneration; } - - for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) + unsigned LiveOutGeneration = CurrentGeneration; + for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) { Changed |= processNode(*I); + // Pop any generation changes off the stack from the recursive walk. + CurrentGeneration = LiveOutGeneration; + } return Changed; } @@ -250,5 +376,9 @@ bool EarlyCSE::runOnFunction(Function &F) { ScopedHTType AVTable; AvailableValues = &AVTable; + MemHTType MemTable; + AvailableMemValues = &MemTable; + + CurrentGeneration = 0; return processNode(DT->getRootNode()); } diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll index 6cfd0a9df98..a761ef764c6 100644 --- a/test/Transforms/EarlyCSE/basic.ll +++ b/test/Transforms/EarlyCSE/basic.ll @@ -30,3 +30,48 @@ define void @test1(i8 %V, i32 *%P) { ; CHECK-NEXT: volatile store i32 %G ret void } + + +;; Simple load value numbering. +; CHECK: @test2 +define i32 @test2(i32 *%P) { + %V1 = load i32* %P + %V2 = load i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: ret i32 0 +} + +;; Cross block load value numbering. +; CHECK: @test3 +define i32 @test3(i32 *%P, i1 %Cond) { + %V1 = load i32* %P + br i1 %Cond, label %T, label %F +T: + store i32 4, i32* %P + ret i32 42 +F: + %V2 = load i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: F: + ; CHECK: ret i32 0 +} + +;; Cross block load value numbering stops when stores happen. +; CHECK: @test4 +define i32 @test4(i32 *%P, i1 %Cond) { + %V1 = load i32* %P + br i1 %Cond, label %T, label %F +T: + ret i32 42 +F: + ; Clobbers V1 + store i32 42, i32* %P + + %V2 = load i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: F: + ; CHECK: ret i32 %Diff +}