diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 78caab3b50a..d15b1250944 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -520,6 +520,8 @@ private: /// Holds all of the instructions that we gathered. SetVector GatherSeq; + /// A list of blocks that we are going to CSE. + SmallSet CSEBlocks; /// Numbers instructions in different blocks. DenseMap BlocksNumbers; @@ -1277,6 +1279,7 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); if (Instruction *Insrt = dyn_cast(Vec)) { GatherSeq.insert(Insrt); + CSEBlocks.insert(Insrt->getParent()); // Add to our 'need-to-extract' list. if (ScalarToTreeEntry.count(VL[i])) { @@ -1591,8 +1594,7 @@ Value *BoUpSLP::vectorizeTree() { if (PHINode *PN = dyn_cast(Vec)) { Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt()); Value *Ex = Builder.CreateExtractElement(Vec, Lane); - if (Instruction *Ins = dyn_cast(Ex)) - GatherSeq.insert(Ins); + CSEBlocks.insert(PN->getParent()); User->replaceUsesOfWith(Scalar, Ex); } else if (isa(Vec)){ if (PHINode *PH = dyn_cast(User)) { @@ -1600,23 +1602,20 @@ Value *BoUpSLP::vectorizeTree() { if (PH->getIncomingValue(i) == Scalar) { Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); Value *Ex = Builder.CreateExtractElement(Vec, Lane); - if (Instruction *Ins = dyn_cast(Ex)) - GatherSeq.insert(Ins); + CSEBlocks.insert(PH->getIncomingBlock(i)); PH->setOperand(i, Ex); } } } else { Builder.SetInsertPoint(cast(User)); Value *Ex = Builder.CreateExtractElement(Vec, Lane); - if (Instruction *Ins = dyn_cast(Ex)) - GatherSeq.insert(Ins); + CSEBlocks.insert(cast(User)->getParent()); User->replaceUsesOfWith(Scalar, Ex); } } else { Builder.SetInsertPoint(F->getEntryBlock().begin()); Value *Ex = Builder.CreateExtractElement(Vec, Lane); - if (Instruction *Ins = dyn_cast(Ex)) - GatherSeq.insert(Ins); + CSEBlocks.insert(&F->getEntryBlock()); User->replaceUsesOfWith(Scalar, Ex); } @@ -1679,9 +1678,6 @@ public: void BoUpSLP::optimizeGatherSequence() { DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() << " gather sequences instructions.\n"); - // Keep a list of visited BBs to run CSE on. It is typically small. - SmallPtrSet VisitedBBs; - SmallVector CSEWorkList; // LICM InsertElementInst sequences. for (SetVector::iterator it = GatherSeq.begin(), e = GatherSeq.end(); it != e; ++it) { @@ -1690,9 +1686,6 @@ void BoUpSLP::optimizeGatherSequence() { if (!Insert) continue; - if (VisitedBBs.insert(Insert->getParent())) - CSEWorkList.push_back(Insert->getParent()); - // Check if this block is inside a loop. Loop *L = LI->getLoopFor(Insert->getParent()); if (!L) @@ -1719,6 +1712,7 @@ void BoUpSLP::optimizeGatherSequence() { // Sort blocks by domination. This ensures we visit a block after all blocks // dominating it are visited. + SmallVector CSEWorkList(CSEBlocks.begin(), CSEBlocks.end()); std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT)); // Perform O(N^2) search over the gather sequences and merge identical @@ -1734,8 +1728,7 @@ void BoUpSLP::optimizeGatherSequence() { // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = it++; - if ((!isa(In) && !isa(In)) || - !GatherSeq.count(In)) + if (!isa(In) && !isa(In)) continue; // Check if we can replace this instruction with any of the @@ -1757,6 +1750,8 @@ void BoUpSLP::optimizeGatherSequence() { } } } + CSEBlocks.clear(); + GatherSeq.clear(); } /// The SLPVectorizer Pass. diff --git a/test/Transforms/SLPVectorizer/X86/pr18060.ll b/test/Transforms/SLPVectorizer/X86/pr18060.ll index de9a4c3c245..e6813f3b315 100644 --- a/test/Transforms/SLPVectorizer/X86/pr18060.ll +++ b/test/Transforms/SLPVectorizer/X86/pr18060.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 +; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" target triple = "i386-pc-linux"