From a9cc4e7e35940bf96b657f60ff241784cef9a0ac Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 13 Nov 2014 21:17:58 +0000 Subject: [PATCH] [GVN] Perform Scalar PRE on gep indices that feed loads before doing Load PRE. Phabricator Revision: http://reviews.llvm.org/D6103 Patch by "Balaram Makam" ! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221924 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/GVN.cpp | 340 ++++++++++++++-------------- test/Transforms/GVN/pre-gep-load.ll | 49 ++++ 2 files changed, 219 insertions(+), 170 deletions(-) create mode 100644 test/Transforms/GVN/pre-gep-load.ll diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 7dba4e2d3ab..527921672a8 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -709,6 +710,7 @@ namespace { void dump(DenseMap &d); bool iterateOnFunction(Function &F); bool performPRE(Function &F); + bool performScalarPRE(Instruction *I); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; @@ -1729,6 +1731,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return false; } + // If this load follows a GEP, see if we can PRE the indices before analyzing. + if (GetElementPtrInst *GEP = dyn_cast(LI->getOperand(0))) { + for(GetElementPtrInst::op_iterator OI = GEP->idx_begin(), + OE = GEP->idx_end(); OI != OE; ++OI) + if (Instruction *I = dyn_cast(OI->get())) + performScalarPRE(I); + } + // Step 2: Analyze the availability of the load AvailValInBlkVect ValuesPerBlock; UnavailBlkVect UnavailableBlocks; @@ -2431,11 +2441,169 @@ bool GVN::processBlock(BasicBlock *BB) { return ChangedFunction; } +bool GVN::performScalarPRE(Instruction *CurInst) { + SmallVector, 8> predMap; + + if (isa(CurInst) || + isa(CurInst) || isa(CurInst) || + CurInst->getType()->isVoidTy() || + CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || + isa(CurInst)) + return false; + + // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from + // sinking the compare again, and it would force the code generator to + // move the i1 from processor flags or predicate registers into a general + // purpose register. + if (isa(CurInst)) + return false; + + // We don't currently value number ANY inline asm calls. + if (CallInst *CallI = dyn_cast(CurInst)) + if (CallI->isInlineAsm()) + return false; + + uint32_t ValNo = VN.lookup(CurInst); + + // Look for the predecessors for PRE opportunities. We're + // only trying to solve the basic diamond case, where + // a value is computed in the successor and one predecessor, + // but not the other. We also explicitly disallow cases + // where the successor is its own predecessor, because they're + // more complicated to get right. + unsigned NumWith = 0; + unsigned NumWithout = 0; + BasicBlock *PREPred = nullptr; + BasicBlock *CurrentBlock = CurInst->getParent(); + predMap.clear(); + + for (pred_iterator PI = pred_begin(CurrentBlock), + PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; + // We're not interested in PRE where the block is its + // own predecessor, or in blocks with predecessors + // that are not reachable. + if (P == CurrentBlock) { + NumWithout = 2; + break; + } else if (!DT->isReachableFromEntry(P)) { + NumWithout = 2; + break; + } + + Value* predV = findLeader(P, ValNo); + if (!predV) { + predMap.push_back(std::make_pair(static_cast(nullptr), P)); + PREPred = P; + ++NumWithout; + } else if (predV == CurInst) { + /* CurInst dominates this predecessor. */ + NumWithout = 2; + break; + } else { + predMap.push_back(std::make_pair(predV, P)); + ++NumWith; + } + } + + // Don't do PRE when it might increase code size, i.e. when + // we would need to insert instructions in more than one pred. + if (NumWithout != 1 || NumWith == 0) + return false; + + // Don't do PRE across indirect branch. + if (isa(PREPred->getTerminator())) + return false; + + // We can't do PRE safely on a critical edge, so instead we schedule + // the edge to be split and perform the PRE the next time we iterate + // on the function. + unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); + if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { + toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); + return false; + } + + // Instantiate the expression in the predecessor that lacked it. + // Because we are going top-down through the block, all value numbers + // will be available in the predecessor by the time we need them. Any + // that weren't originally present will have been instantiated earlier + // in this loop. + Instruction *PREInstr = CurInst->clone(); + bool success = true; + for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { + Value *Op = PREInstr->getOperand(i); + if (isa(Op) || isa(Op) || isa(Op)) + continue; + + if (Value *V = findLeader(PREPred, VN.lookup(Op))) { + PREInstr->setOperand(i, V); + } else { + success = false; + break; + } + } + + // Fail out if we encounter an operand that is not available in + // the PRE predecessor. This is typically because of loads which + // are not value numbered precisely. + if (!success) { + DEBUG(verifyRemoved(PREInstr)); + delete PREInstr; + return false; + } + + PREInstr->insertBefore(PREPred->getTerminator()); + PREInstr->setName(CurInst->getName() + ".pre"); + PREInstr->setDebugLoc(CurInst->getDebugLoc()); + VN.add(PREInstr, ValNo); + ++NumGVNPRE; + + // Update the availability map to include the new instruction. + addToLeaderTable(ValNo, PREInstr, PREPred); + + // Create a PHI to make the value available in this block. + PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), + CurInst->getName() + ".pre-phi", + CurrentBlock->begin()); + for (unsigned i = 0, e = predMap.size(); i != e; ++i) { + if (Value *V = predMap[i].first) + Phi->addIncoming(V, predMap[i].second); + else + Phi->addIncoming(PREInstr, PREPred); + } + + VN.add(Phi, ValNo); + addToLeaderTable(ValNo, Phi, CurrentBlock); + Phi->setDebugLoc(CurInst->getDebugLoc()); + CurInst->replaceAllUsesWith(Phi); + if (Phi->getType()->getScalarType()->isPointerTy()) { + // Because we have added a PHI-use of the pointer value, it has now + // "escaped" from alias analysis' perspective. We need to inform + // AA of this. + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; + ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); + } + + if (MD) + MD->invalidateCachedPointerInfo(Phi); + } + VN.erase(CurInst); + removeFromLeaderTable(ValNo, CurInst, CurrentBlock); + + DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); + if (MD) MD->removeInstruction(CurInst); + DEBUG(verifyRemoved(CurInst)); + CurInst->eraseFromParent(); + return true; +} + /// performPRE - Perform a purely local form of PRE that looks for diamond /// control flow patterns and attempts to perform simple PRE at the join point. bool GVN::performPRE(Function &F) { bool Changed = false; - SmallVector, 8> predMap; for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; @@ -2446,160 +2614,7 @@ bool GVN::performPRE(Function &F) { for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; - - if (isa(CurInst) || - isa(CurInst) || isa(CurInst) || - CurInst->getType()->isVoidTy() || - CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || - isa(CurInst)) - continue; - - // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from - // sinking the compare again, and it would force the code generator to - // move the i1 from processor flags or predicate registers into a general - // purpose register. - if (isa(CurInst)) - continue; - - // We don't currently value number ANY inline asm calls. - if (CallInst *CallI = dyn_cast(CurInst)) - if (CallI->isInlineAsm()) - continue; - - uint32_t ValNo = VN.lookup(CurInst); - - // Look for the predecessors for PRE opportunities. We're - // only trying to solve the basic diamond case, where - // a value is computed in the successor and one predecessor, - // but not the other. We also explicitly disallow cases - // where the successor is its own predecessor, because they're - // more complicated to get right. - unsigned NumWith = 0; - unsigned NumWithout = 0; - BasicBlock *PREPred = nullptr; - predMap.clear(); - - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) { - BasicBlock *P = *PI; - // We're not interested in PRE where the block is its - // own predecessor, or in blocks with predecessors - // that are not reachable. - if (P == CurrentBlock) { - NumWithout = 2; - break; - } else if (!DT->isReachableFromEntry(P)) { - NumWithout = 2; - break; - } - - Value* predV = findLeader(P, ValNo); - if (!predV) { - predMap.push_back(std::make_pair(static_cast(nullptr), P)); - PREPred = P; - ++NumWithout; - } else if (predV == CurInst) { - /* CurInst dominates this predecessor. */ - NumWithout = 2; - break; - } else { - predMap.push_back(std::make_pair(predV, P)); - ++NumWith; - } - } - - // Don't do PRE when it might increase code size, i.e. when - // we would need to insert instructions in more than one pred. - if (NumWithout != 1 || NumWith == 0) - continue; - - // Don't do PRE across indirect branch. - if (isa(PREPred->getTerminator())) - continue; - - // We can't do PRE safely on a critical edge, so instead we schedule - // the edge to be split and perform the PRE the next time we iterate - // on the function. - unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); - if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { - toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); - continue; - } - - // Instantiate the expression in the predecessor that lacked it. - // Because we are going top-down through the block, all value numbers - // will be available in the predecessor by the time we need them. Any - // that weren't originally present will have been instantiated earlier - // in this loop. - Instruction *PREInstr = CurInst->clone(); - bool success = true; - for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { - Value *Op = PREInstr->getOperand(i); - if (isa(Op) || isa(Op) || isa(Op)) - continue; - - if (Value *V = findLeader(PREPred, VN.lookup(Op))) { - PREInstr->setOperand(i, V); - } else { - success = false; - break; - } - } - - // Fail out if we encounter an operand that is not available in - // the PRE predecessor. This is typically because of loads which - // are not value numbered precisely. - if (!success) { - DEBUG(verifyRemoved(PREInstr)); - delete PREInstr; - continue; - } - - PREInstr->insertBefore(PREPred->getTerminator()); - PREInstr->setName(CurInst->getName() + ".pre"); - PREInstr->setDebugLoc(CurInst->getDebugLoc()); - VN.add(PREInstr, ValNo); - ++NumGVNPRE; - - // Update the availability map to include the new instruction. - addToLeaderTable(ValNo, PREInstr, PREPred); - - // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), - CurInst->getName() + ".pre-phi", - CurrentBlock->begin()); - for (unsigned i = 0, e = predMap.size(); i != e; ++i) { - if (Value *V = predMap[i].first) - Phi->addIncoming(V, predMap[i].second); - else - Phi->addIncoming(PREInstr, PREPred); - } - - VN.add(Phi, ValNo); - addToLeaderTable(ValNo, Phi, CurrentBlock); - Phi->setDebugLoc(CurInst->getDebugLoc()); - CurInst->replaceAllUsesWith(Phi); - if (Phi->getType()->getScalarType()->isPointerTy()) { - // Because we have added a PHI-use of the pointer value, it has now - // "escaped" from alias analysis' perspective. We need to inform - // AA of this. - for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; - ++ii) { - unsigned jj = PHINode::getOperandNumForIncomingValue(ii); - VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); - } - - if (MD) - MD->invalidateCachedPointerInfo(Phi); - } - VN.erase(CurInst); - removeFromLeaderTable(ValNo, CurInst, CurrentBlock); - - DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); - if (MD) MD->removeInstruction(CurInst); - DEBUG(verifyRemoved(CurInst)); - CurInst->eraseFromParent(); - Changed = true; + Changed = performScalarPRE(CurInst); } } @@ -2637,26 +2652,11 @@ bool GVN::iterateOnFunction(Function &F) { // Top-down walk of the dominator tree bool Changed = false; -#if 0 // Needed for value numbering with phi construction to work. ReversePostOrderTraversal RPOT(&F); for (ReversePostOrderTraversal::rpo_iterator RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) Changed |= processBlock(*RI); -#else - // Save the blocks this function have before transformation begins. GVN may - // split critical edge, and hence may invalidate the RPO/DT iterator. - // - std::vector BBVect; - BBVect.reserve(256); - for (DomTreeNode *X : depth_first(DT->getRootNode())) - BBVect.push_back(X->getBlock()); - - for (std::vector::iterator I = BBVect.begin(), E = BBVect.end(); - I != E; I++) - Changed |= processBlock(*I); -#endif - return Changed; } diff --git a/test/Transforms/GVN/pre-gep-load.ll b/test/Transforms/GVN/pre-gep-load.ll new file mode 100644 index 00000000000..3ee3a37b81a --- /dev/null +++ b/test/Transforms/GVN/pre-gep-load.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define double @foo(i32 %stat, i32 %i, double** %p) { +; CHECK-LABEL: @foo( +entry: + switch i32 %stat, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb + i32 2, label %sw.bb2 + ] + +sw.bb: ; preds = %entry, %entry + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds double** %p, i64 0 + %0 = load double** %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds double* %0, i64 %idxprom + %1 = load double* %arrayidx1, align 8 + %sub = fsub double %1, 1.000000e+00 + %cmp = fcmp olt double %sub, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %sw.bb + br label %return + +if.end: ; preds = %sw.bb + br label %sw.bb2 + +sw.bb2: ; preds = %if.end, %entry + %idxprom3 = sext i32 %i to i64 + %arrayidx4 = getelementptr inbounds double** %p, i64 0 + %2 = load double** %arrayidx4, align 8 + %arrayidx5 = getelementptr inbounds double* %2, i64 %idxprom3 + %3 = load double* %arrayidx5, align 8 +; CHECK: sw.bb2: +; CHECK-NEXT-NOT: sext +; CHECK-NEXT: phi double [ +; CHECK-NOT: load + %sub6 = fsub double 3.000000e+00, %3 + br label %return + +sw.default: ; preds = %entry + br label %return + +return: ; preds = %sw.default, %sw.bb2, %if.then + %retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ] + ret double %retval.0 +}