From 1ee3c0008be6f7012aa69f7a73a2819300bdf23f Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Mon, 7 Oct 2013 21:05:43 +0000
Subject: [PATCH] LoopVectorize: External uses must use the last value in a
 reduction cycle

Otherwise, we don't perform operations that would have been performed on
the scalar version.

Fixes PR17498.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192133 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp |  6 +++++
 test/Transforms/LoopVectorize/reduction.ll | 27 ++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a71df08aba0..415f86e8938 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4015,6 +4015,12 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
         if (ExitInstruction != 0 || Cur == Phi)
           return false;
 
+        // The instruction used by an outside user must be the last instruction
+        // before we feed back to the reduction phi. Otherwise, we loose VF-1
+        // operations on the value.
+        if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+         return false;
+
         ExitInstruction = Cur;
         continue;
       }
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 18a0a9341e0..791fce15622 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -467,3 +467,30 @@ for.body:
 for.end:
   ret i32 %p.addr.02
 }
+
+; Don't vectorize a reduction value that is not the last in a reduction cyle. We
+; would loose iterations (VF-1) on the operations after that use.
+; PR17498
+
+; CHECK-LABEL: not_last_operation
+; CHECK-NOT: x i32>
+define i32 @not_last_operation(i32 %p, i32 %val) {
+entry:
+  %tobool = icmp eq i32 %p, 0
+  br label %for.body
+
+for.body:
+  %inc613.1 = phi i32 [ 0, %entry ], [ %inc6.1, %for.body ]
+  %inc511.1 = phi i32 [ %val, %entry ], [ %inc5.1, %for.body ]
+  %0 = zext i1 %tobool to i32
+  %inc4.1 = xor i32 %0, 1
+  %inc511.1.inc4.1 = add nsw i32 %inc511.1, %inc4.1
+  %inc5.1 = add nsw i32 %inc511.1.inc4.1, 1
+  %inc6.1 = add nsw i32 %inc613.1, 1
+  %exitcond.1 = icmp eq i32 %inc6.1, 22
+  br i1 %exitcond.1, label %exit, label %for.body
+
+exit:
+  %inc.2 = add nsw i32 %inc511.1.inc4.1, 2
+  ret i32 %inc.2
+}