From 67a38a2875f05ea9c219ab73c4398ee675eb4292 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 26 Jul 2013 23:07:55 +0000 Subject: [PATCH] SLP Vectorier: Don't vectorize really short chains because they are already handled by the SelectionDAG store-vectorizer, which does a better job in deciding when to vectorize. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187267 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 ++++++-- test/Transforms/SLPVectorizer/X86/reduction2.ll | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index c1accd32c5c..50e37e99169 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -898,8 +898,12 @@ int BoUpSLP::getTreeCost() { DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << VectorizableTree.size() << ".\n"); - if (!VectorizableTree.size()) { - assert(!ExternalUses.size() && "We should not have any external users"); + // Don't vectorize tiny trees. Small load/store chains or consecutive stores + // of constants will be vectoried in SelectionDAG in MergeConsecutiveStores. + if (VectorizableTree.size() < 3) { + if (!VectorizableTree.size()) { + assert(!ExternalUses.size() && "We should not have any external users"); + } return 0; } diff --git a/test/Transforms/SLPVectorizer/X86/reduction2.ll b/test/Transforms/SLPVectorizer/X86/reduction2.ll index 1dc77d2ef5b..f21e86c5646 100644 --- a/test/Transforms/SLPVectorizer/X86/reduction2.ll +++ b/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -16,11 +16,13 @@ define double @foo(double* nocapture %D) { %3 = getelementptr inbounds double* %D, i32 %2 %4 = load double* %3, align 4 %A4 = fmul double %4, %4 + %A42 = fmul double %A4, %A4 %5 = or i32 %2, 1 %6 = getelementptr inbounds double* %D, i32 %5 %7 = load double* %6, align 4 %A7 = fmul double %7, %7 - %8 = fadd double %A4, %A7 + %A72 = fmul double %A7, %A7 + %8 = fadd double %A42, %A72 %9 = fadd double %sum.01, %8 %10 = add nsw i32 %i.02, 1 %exitcond = icmp eq i32 %10, 100