diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index bb37994e9ff..33d1100ebc7 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1302,6 +1302,7 @@ void FuncSLP::optimizeGatherSequence() { // instructions. TODO: We can further optimize this scan if we split the // instructions into different buckets based on the insert lane. SmallPtrSet<Instruction*, 16> Visited; + SmallPtrSet<Instruction*, 16> ToRemove; ReversePostOrderTraversal<Function*> RPOT(F); for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { @@ -1319,6 +1320,7 @@ void FuncSLP::optimizeGatherSequence() { if (Insert->isIdenticalTo(*v) && DT->dominates((*v)->getParent(), Insert->getParent())) { Insert->replaceAllUsesWith(*v); + ToRemove.insert(Insert); Insert = 0; break; } @@ -1327,6 +1329,13 @@ void FuncSLP::optimizeGatherSequence() { Visited.insert(Insert); } } + + // Erase all of the instructions that we RAUWed. + for (SmallPtrSet<Instruction*, 16>::iterator v = ToRemove.begin(), + ve = ToRemove.end(); v != ve; ++v) { + assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses"); + (*v)->eraseFromParent(); + } } /// The SLPVectorizer Pass. diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll index 1c1e4b56b6f..17b966bd962 100644 --- a/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/test/Transforms/SLPVectorizer/X86/cse.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s +; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" target triple = "i386-apple-macosx10.8.0"