Re-apply r73718, now that the fix in r73787 is in, and add a

hand-crafted testcase which demonstrates the bug that was exposed
in 254.gap.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73793 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2009-06-19 23:23:27 +00:00
parent 5090f19f09
commit 2781f30eac
4 changed files with 66 additions and 32 deletions

View File

@ -143,10 +143,10 @@ namespace {
/// inside the loop then try to eliminate the cast opeation.
void OptimizeShadowIV(Loop *L);
/// OptimizeSMax - Rewrite the loop's terminating condition
/// if it uses an smax computation.
ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
/// OptimizeMax - Rewrite the loop's terminating condition
/// if it uses a max computation.
ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
const SCEVHandle *&CondStride);
@ -2044,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
return Cond;
}
/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
/// an smax computation.
/// OptimizeMax - Rewrite the loop's terminating condition if it uses
/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
@ -2055,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// p[i] = 0.0;
/// } while (++i < n);
///
/// where the comparison is signed, the trip count isn't just 'n', because
/// 'n' could be negative. And unfortunately this can come up even for loops
/// where the user didn't use a C do-while loop. For example, seemingly
/// well-behaved top-test loops will commonly be lowered like this:
/// the trip count isn't just 'n', because 'n' might not be positive. And
/// unfortunately this can come up even for loops where the user didn't use
/// a C do-while loop. For example, seemingly well-behaved top-test loops
/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
@ -2071,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// signed-max expression, which allows it to give the loop a canonical
/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// smax = n < 1 ? 1 : n;
/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != smax);
/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
@ -2094,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
@ -2113,17 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
return Cond;
const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
if (Max != SE->getSCEV(Sel)) return Cond;
// Two handle a max with more than two operands, this optimization would
// require additional checking and setup.
if (SMax->getNumOperands() != 2)
if (Max->getNumOperands() != 2)
return Cond;
SCEVHandle SMaxLHS = SMax->getOperand(0);
SCEVHandle SMaxRHS = SMax->getOperand(1);
if (!SMaxLHS || SMaxLHS != One) return Cond;
SCEVHandle MaxLHS = Max->getOperand(0);
SCEVHandle MaxRHS = Max->getOperand(1);
if (!MaxLHS || MaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
@ -2140,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
CmpInst::Predicate Pred =
isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
CmpInst::ICMP_SLT :
CmpInst::ICMP_SGE,
Cond->getOperand(0), NewRHS, "scmp", Cond);
new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
@ -2365,10 +2371,10 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
StrideNoReuse.insert(*CondStride);
}
// If the trip count is computed in terms of an smax (due to ScalarEvolution
// If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT instead of NE.
Cond = OptimizeSMax(L, Cond, CondUse);
// comparison to use SLT or ULT instead of NE.
Cond = OptimizeMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
// eliminate one stride.

View File

@ -1,5 +1,4 @@
; RUN: llvm-as < %s | llc -march=x86 | not grep cmov
; XFAIL: *
; LSR should be able to eliminate the max computations by
; making the loops use slt/ult comparisons instead of ne comparisons.

View File

@ -1,5 +1,4 @@
; RUN: llvm-as < %s | llc -march=x86-64 | not grep cmov
; XFAIL: *
; LSR should be able to eliminate both smax and umax expressions
; in loop trip counts.

View File

@ -0,0 +1,30 @@
; RUN: llvm-as < %s | llc -march=x86-64 > %t
; RUN: grep cmov %t | count 2
; RUN: grep jne %t | count 1
; LSR's OptimizeMax function shouldn't try to eliminate this max, because
; it has three operands.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
define void @foo(double* nocapture %p, i64 %x, i64 %y) nounwind {
entry:
%tmp = icmp eq i64 %y, 0 ; <i1> [#uses=1]
%umax = select i1 %tmp, i64 1, i64 %y ; <i64> [#uses=2]
%tmp8 = icmp ugt i64 %umax, %x ; <i1> [#uses=1]
%umax9 = select i1 %tmp8, i64 %umax, i64 %x ; <i64> [#uses=1]
br label %bb4
bb4: ; preds = %bb4, %entry
%i.07 = phi i64 [ 0, %entry ], [ %2, %bb4 ] ; <i64> [#uses=2]
%scevgep = getelementptr double* %p, i64 %i.07 ; <double*> [#uses=2]
%0 = load double* %scevgep, align 8 ; <double> [#uses=1]
%1 = fmul double %0, 2.000000e+00 ; <double> [#uses=1]
store double %1, double* %scevgep, align 8
%2 = add i64 %i.07, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %2, %umax9 ; <i1> [#uses=1]
br i1 %exitcond, label %return, label %bb4
return: ; preds = %bb4
ret void
}