PR1860 - We can't save a list of ExtractElement instructions to CSE because some of these instructions

may be removed and optimized in future iterations. Instead we save a list of basic blocks that we need to CSE.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195791 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2013-11-26 22:24:25 +00:00
parent 3fd6a64030
commit 7e8ff837e6
2 changed files with 12 additions and 17 deletions

View File

@ -520,6 +520,8 @@ private:
/// Holds all of the instructions that we gathered. /// Holds all of the instructions that we gathered.
SetVector<Instruction *> GatherSeq; SetVector<Instruction *> GatherSeq;
/// A list of blocks that we are going to CSE.
SmallSet<BasicBlock *, 8> CSEBlocks;
/// Numbers instructions in different blocks. /// Numbers instructions in different blocks.
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers; DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
@ -1277,6 +1279,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) { if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
GatherSeq.insert(Insrt); GatherSeq.insert(Insrt);
CSEBlocks.insert(Insrt->getParent());
// Add to our 'need-to-extract' list. // Add to our 'need-to-extract' list.
if (ScalarToTreeEntry.count(VL[i])) { if (ScalarToTreeEntry.count(VL[i])) {
@ -1591,8 +1594,7 @@ Value *BoUpSLP::vectorizeTree() {
if (PHINode *PN = dyn_cast<PHINode>(Vec)) { if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt()); Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
Value *Ex = Builder.CreateExtractElement(Vec, Lane); Value *Ex = Builder.CreateExtractElement(Vec, Lane);
if (Instruction *Ins = dyn_cast<Instruction>(Ex)) CSEBlocks.insert(PN->getParent());
GatherSeq.insert(Ins);
User->replaceUsesOfWith(Scalar, Ex); User->replaceUsesOfWith(Scalar, Ex);
} else if (isa<Instruction>(Vec)){ } else if (isa<Instruction>(Vec)){
if (PHINode *PH = dyn_cast<PHINode>(User)) { if (PHINode *PH = dyn_cast<PHINode>(User)) {
@ -1600,23 +1602,20 @@ Value *BoUpSLP::vectorizeTree() {
if (PH->getIncomingValue(i) == Scalar) { if (PH->getIncomingValue(i) == Scalar) {
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
Value *Ex = Builder.CreateExtractElement(Vec, Lane); Value *Ex = Builder.CreateExtractElement(Vec, Lane);
if (Instruction *Ins = dyn_cast<Instruction>(Ex)) CSEBlocks.insert(PH->getIncomingBlock(i));
GatherSeq.insert(Ins);
PH->setOperand(i, Ex); PH->setOperand(i, Ex);
} }
} }
} else { } else {
Builder.SetInsertPoint(cast<Instruction>(User)); Builder.SetInsertPoint(cast<Instruction>(User));
Value *Ex = Builder.CreateExtractElement(Vec, Lane); Value *Ex = Builder.CreateExtractElement(Vec, Lane);
if (Instruction *Ins = dyn_cast<Instruction>(Ex)) CSEBlocks.insert(cast<Instruction>(User)->getParent());
GatherSeq.insert(Ins);
User->replaceUsesOfWith(Scalar, Ex); User->replaceUsesOfWith(Scalar, Ex);
} }
} else { } else {
Builder.SetInsertPoint(F->getEntryBlock().begin()); Builder.SetInsertPoint(F->getEntryBlock().begin());
Value *Ex = Builder.CreateExtractElement(Vec, Lane); Value *Ex = Builder.CreateExtractElement(Vec, Lane);
if (Instruction *Ins = dyn_cast<Instruction>(Ex)) CSEBlocks.insert(&F->getEntryBlock());
GatherSeq.insert(Ins);
User->replaceUsesOfWith(Scalar, Ex); User->replaceUsesOfWith(Scalar, Ex);
} }
@ -1679,9 +1678,6 @@ public:
void BoUpSLP::optimizeGatherSequence() { void BoUpSLP::optimizeGatherSequence() {
DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
<< " gather sequences instructions.\n"); << " gather sequences instructions.\n");
// Keep a list of visited BBs to run CSE on. It is typically small.
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
SmallVector<BasicBlock *, 4> CSEWorkList;
// LICM InsertElementInst sequences. // LICM InsertElementInst sequences.
for (SetVector<Instruction *>::iterator it = GatherSeq.begin(), for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),
e = GatherSeq.end(); it != e; ++it) { e = GatherSeq.end(); it != e; ++it) {
@ -1690,9 +1686,6 @@ void BoUpSLP::optimizeGatherSequence() {
if (!Insert) if (!Insert)
continue; continue;
if (VisitedBBs.insert(Insert->getParent()))
CSEWorkList.push_back(Insert->getParent());
// Check if this block is inside a loop. // Check if this block is inside a loop.
Loop *L = LI->getLoopFor(Insert->getParent()); Loop *L = LI->getLoopFor(Insert->getParent());
if (!L) if (!L)
@ -1719,6 +1712,7 @@ void BoUpSLP::optimizeGatherSequence() {
// Sort blocks by domination. This ensures we visit a block after all blocks // Sort blocks by domination. This ensures we visit a block after all blocks
// dominating it are visited. // dominating it are visited.
SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT)); std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));
// Perform O(N^2) search over the gather sequences and merge identical // Perform O(N^2) search over the gather sequences and merge identical
@ -1734,8 +1728,7 @@ void BoUpSLP::optimizeGatherSequence() {
// For all instructions in blocks containing gather sequences: // For all instructions in blocks containing gather sequences:
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
Instruction *In = it++; Instruction *In = it++;
if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) || if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
!GatherSeq.count(In))
continue; continue;
// Check if we can replace this instruction with any of the // Check if we can replace this instruction with any of the
@ -1757,6 +1750,8 @@ void BoUpSLP::optimizeGatherSequence() {
} }
} }
} }
CSEBlocks.clear();
GatherSeq.clear();
} }
/// The SLPVectorizer Pass. /// The SLPVectorizer Pass.

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 ; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
target triple = "i386-pc-linux" target triple = "i386-pc-linux"