From 79f0bfcc20135844d260a20c359222cd90481f78 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 16 Nov 2011 00:52:40 +0000 Subject: [PATCH] Fix SCEV overly optimistic back edge taken count for multi-exit loops. Fixes PR11375: Different results for 'clang++ huh.cpp'... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144746 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 31 +++++++++----- .../IndVarSimplify/2011-11-15-multiexit.ll | 40 +++++++++++++++++++ .../IndVarSimplify/loop_evaluate10.ll | 8 +++- .../IndVarSimplify/loop_evaluate9.ll | 7 +++- 4 files changed, 75 insertions(+), 11 deletions(-) create mode 100644 test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index ac00259c5bb..77defa82e71 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4153,13 +4153,19 @@ void ScalarEvolution::forgetValue(Value *V) { } /// getExact - Get the exact loop backedge taken count considering all loop -/// exits. If all exits are computable, this is the minimum computed count. +/// exits. A computable result can only be return for loops with a single exit. +/// Returning the minimum taken count among all exits is incorrect because one +/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that +/// the limit of each loop test is never skipped. This is a valid assumption as +/// long as the loop exits via that test. For precise results, it is the +/// caller's responsibility to specify the relevant loop exit using +/// getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { // If any exits were not computable, the loop is not computable. if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); - // We need at least one computable exit. + // We need exactly one computable exit. if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); @@ -4171,8 +4177,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { if (!BECount) BECount = ENT->ExactNotTaken; - else - BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken); + else if (BECount != ENT->ExactNotTaken) + return SE->getCouldNotCompute(); } assert(BECount && "Invalid not taken count for loop exit"); return BECount; @@ -4253,8 +4259,15 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { if (MaxBECount == getCouldNotCompute()) MaxBECount = EL.Max; - else if (EL.Max != getCouldNotCompute()) - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max); + else if (EL.Max != getCouldNotCompute()) { + // We cannot take the "min" MaxBECount, because non-unit stride loops may + // skip some loop tests. Taking the max over the exits is sufficiently + // conservative. TODO: We could do better taking into consideration + // that (1) the loop has unit stride (2) the last loop test is + // less-than/greater-than (3) any loop test is less-than/greater-than AND + // falls-through some constant times less then the other tests. + MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + } } return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); @@ -4920,7 +4933,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = dyn_cast_or_null(EvaluateExpression(Cond, L, @@ -5507,10 +5520,10 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // behavior. Loops must exhibit defined behavior until a wrapped value is // actually used. So the trip count computed by udiv could be smaller than the // number of well-defined iterations. - if (AddRec->getNoWrapFlags(SCEV::FlagNW)) + if (AddRec->getNoWrapFlags(SCEV::FlagNW)) { // FIXME: We really want an "isexact" bit for udiv. return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - + } // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast(Start)) return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), diff --git a/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll b/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll new file mode 100644 index 00000000000..c74d04e6a5d --- /dev/null +++ b/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -indvars -S | FileCheck %s +; +; Prior to the fix for PR11375, indvars would replace %firstIV with a +; loop-invariant gep computed in the preheader. This was incorrect +; because it was based on the minimum "ExitNotTaken" count. If the +; final loop test is skipped (odd number of elements) then the early +; exit would be taken and the loop invariant value would be incorrect. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin" + +; CHECK: if.end: +; CHECK: phi i32* [ %first.lcssa, %early.exit ] +define i32 @test(i32* %first, i32* %last) uwtable ssp { +entry: + br i1 undef, label %if.end, label %if.then + +if.then: ; preds = %entry + br i1 undef, label %if.end, label %do.body + +do.body: ; preds = %if.else, %if.then + %firstIV = phi i32* [ %incdec.ptr2, %if.else ], [ %first, %if.then ] + %incdec.ptr1 = getelementptr inbounds i32* %firstIV, i64 1 + %cmp1 = icmp eq i32* %incdec.ptr1, %last + br i1 %cmp1, label %early.exit, label %if.else + +if.else: ; preds = %do.body + %incdec.ptr2 = getelementptr inbounds i32* %firstIV, i64 2 + %cmp2 = icmp eq i32* %incdec.ptr2, %last + br i1 %cmp2, label %if.end, label %do.body + +early.exit: + %first.lcssa = phi i32* [ %firstIV, %do.body ] + br label %if.end + +if.end: + %tmp = phi i32* [ %first.lcssa, %early.exit ], [ %first, %if.then ], [ %first, %entry ], [ undef, %if.else ] + %val = load i32* %tmp + ret i32 %val +} diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll index 269478a5ed0..c3619f640b3 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll @@ -1,8 +1,14 @@ ; RUN: opt < %s -indvars -S \ ; RUN: | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]} - +; ; This loop has multiple exits, and the value of %b1 depends on which ; exit is taken. Indvars should correctly compute the exit values. +; +; XFAIL: * +; Indvars does not currently replace loop invariant values unless all +; loop exits have the same exit value. We could handle some cases, +; such as this, by making getSCEVAtScope() sensitive to a particular +; loop exit. See PR11388. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-pc-linux-gnu" diff --git a/test/Transforms/IndVarSimplify/loop_evaluate9.ll b/test/Transforms/IndVarSimplify/loop_evaluate9.ll index 8184a73f89e..9f3bcaf21be 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate9.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate9.ll @@ -2,8 +2,13 @@ ; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t ; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t ; PR4477 - ; Indvars should compute the exit values in loop. +; +; XFAIL: * +; Indvars does not currently replace loop invariant values unless all +; loop exits have the same exit value. We could handle some cases, +; such as this, by making getSCEVAtScope() sensitive to a particular +; loop exit. See PR11388. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i386-pc-linux-gnu"