diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 79f80f37eea..874db9ff152 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1537,6 +1537,15 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop); assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); + // The exit count might have the type of i64 while the phi is i32. This can + // happen if we have an induction variable that is sign extended before the + // compare. The only way that we get a backedge taken count is that the + // induction variable was signed and as such will not overflow. In such a case + // truncation is legal. + if (ExitCount->getType()->getPrimitiveSizeInBits() > + IdxTy->getPrimitiveSizeInBits()) + ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy); + ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy); // Get the total trip count from the count by adding 1. ExitCount = SE->getAddExpr(ExitCount, diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll new file mode 100644 index 00000000000..6b38bacf888 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/tripcount.ll @@ -0,0 +1,39 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" +target triple = "i386-unknown-freebsd11.0" + +@big = external global [0 x i32] + +; PR18049 +; We need to truncate the exit count to i32. This is legal because the +; arithmetic is signed (%inc is nsw). + +; CHECK-LABEL: tripcount +; CHECK: trunc i64 %count to i32 + +define void @tripcount(i64 %count) { +entry: + %cmp6 = icmp sgt i64 %count, 0 + br i1 %cmp6, label %for.body.preheader, label %for.end + +for.body.preheader: + br label %for.body + +for.body: + %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07 + %0 = load i32* %arrayidx, align 4 + %neg = xor i32 %0, -1 + store i32 %neg, i32* %arrayidx, align 4 + %inc = add nsw i32 %i.07, 1 + %conv = sext i32 %inc to i64 + %cmp = icmp slt i64 %conv, %count + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: + br label %for.end + +for.end: + ret void +}