Insert the reduction start value into the first bypass block to preserve domination.

Fixes PR15344. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176701 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-19 06:31:18 +00:00 · 2013-03-08 16:58:37 +00:00 · 2013-03-08 16:58:37 +00:00 · f22d9cfa6d
commit f22d9cfa6d
parent 7893d29c62
2 changed files with 36 additions and 1 deletions
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -1643,7 +1643,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
    // To do so, we need to generate the 'identity' vector and overide
    // one of the elements with the incoming scalar reduction. We need
    // to do it in the vector-loop preheader.
-    Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
+    Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());

    // This is the vector-clone of the value that leaves the loop.
    VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
--- a/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@ -0,0 +1,35 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-apple-darwin"
+
+; PR15344
+define void @test1(float* nocapture %arg, i32 %arg1) nounwind {
+; CHECK: @test1
+; CHECK: preheader
+; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0
+; CHECK: vector.memcheck
+
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp = load double* null, align 8
+  br i1 undef, label %bb3, label %bb12
+
+bb3:                                              ; preds = %bb3, %bb2
+  %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
+  %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
+  %tmp6 = getelementptr inbounds [16 x double]* undef, i32 0, i32 %tmp5
+  %tmp7 = load double* %tmp6, align 4
+  %tmp8 = add nsw i32 %tmp5, 1
+  %tmp9 = fadd fast double %tmp4, undef
+  %tmp10 = getelementptr inbounds float* %arg, i32 %tmp5
+  store float undef, float* %tmp10, align 4
+  %tmp11 = icmp eq i32 %tmp8, %arg1
+  br i1 %tmp11, label %bb12, label %bb3
+
+bb12:                                             ; preds = %bb3, %bb2
+  %tmp13 = phi double [ %tmp, %bb2 ], [ %tmp9, %bb3 ]
+  ret void
+}