Insert the reduction start value into the first bypass block to preserve domination.

Fixes PR15344.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176701 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2013-03-08 16:58:37 +00:00
parent 7893d29c62
commit f22d9cfa6d
2 changed files with 36 additions and 1 deletions

View File

@ -1643,7 +1643,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// To do so, we need to generate the 'identity' vector and overide // To do so, we need to generate the 'identity' vector and overide
// one of the elements with the incoming scalar reduction. We need // one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader. // to do it in the vector-loop preheader.
Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator()); Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
// This is the vector-clone of the value that leaves the loop. // This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);

View File

@ -0,0 +1,35 @@
; RUN: opt -S -loop-vectorize < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
target triple = "i386-apple-darwin"
; PR15344
define void @test1(float* nocapture %arg, i32 %arg1) nounwind {
; CHECK: @test1
; CHECK: preheader
; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0
; CHECK: vector.memcheck
bb:
br label %bb2
bb2: ; preds = %bb
%tmp = load double* null, align 8
br i1 undef, label %bb3, label %bb12
bb3: ; preds = %bb3, %bb2
%tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
%tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
%tmp6 = getelementptr inbounds [16 x double]* undef, i32 0, i32 %tmp5
%tmp7 = load double* %tmp6, align 4
%tmp8 = add nsw i32 %tmp5, 1
%tmp9 = fadd fast double %tmp4, undef
%tmp10 = getelementptr inbounds float* %arg, i32 %tmp5
store float undef, float* %tmp10, align 4
%tmp11 = icmp eq i32 %tmp8, %arg1
br i1 %tmp11, label %bb12, label %bb3
bb12: ; preds = %bb3, %bb2
%tmp13 = phi double [ %tmp, %bb2 ], [ %tmp9, %bb3 ]
ret void
}