diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a09b3dc5f85..9fdbf4aa1ba 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1461,12 +1461,26 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Add one to the backedge-taken count to get the trip count. const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); + if (IterationCount != SE.getSCEV(Sel)) return Cond; - // Check for a max calculation that matches the pattern. - if (!isa(IterationCount) && !isa(IterationCount)) + // Check for a max calculation that matches the pattern. There's no check + // for ICMP_ULE here because the comparison would be with zero, which + // isn't interesting. + CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + const SCEVNAryExpr *Max = 0; + if (const SCEVSMaxExpr *S = dyn_cast(BackedgeTakenCount)) { + Pred = ICmpInst::ICMP_SLE; + Max = S; + } else if (const SCEVSMaxExpr *S = dyn_cast(IterationCount)) { + Pred = ICmpInst::ICMP_SLT; + Max = S; + } else if (const SCEVUMaxExpr *U = dyn_cast(IterationCount)) { + Pred = ICmpInst::ICMP_ULT; + Max = U; + } else { + // No match; bail. return Cond; - const SCEVNAryExpr *Max = cast(IterationCount); - if (Max != SE.getSCEV(Sel)) return Cond; + } // To handle a max with more than two operands, this optimization would // require additional checking and setup. @@ -1475,7 +1489,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { const SCEV *MaxLHS = Max->getOperand(0); const SCEV *MaxRHS = Max->getOperand(1); - if (!MaxLHS || MaxLHS != One) return Cond; + + // ScalarEvolution canonicalizes constants to the left. For < and >, look + // for a comparison with 1. For <= and >=, a comparison with zero. + if (!MaxLHS || + (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One))) + return Cond; + // Check the relevant induction variable for conformance to // the pattern. const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); @@ -1491,16 +1511,29 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. Value *NewRHS = 0; - if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) + if (ICmpInst::isTrueWhenEqual(Pred)) { + // Look for n+1, and grab n. + if (AddOperator *BO = dyn_cast(Sel->getOperand(1))) + if (isa(BO->getOperand(1)) && + cast(BO->getOperand(1))->isOne() && + SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); + if (AddOperator *BO = dyn_cast(Sel->getOperand(2))) + if (isa(BO->getOperand(1)) && + cast(BO->getOperand(1))->isOne() && + SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); + if (!NewRHS) + return Cond; + } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) NewRHS = Sel->getOperand(1); else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); - if (!NewRHS) return Cond; + else + llvm_unreachable("Max doesn't match expected pattern!"); // Determine the new comparison opcode. It may be signed or unsigned, // and the original comparison may be either equality or inequality. - CmpInst::Predicate Pred = - isa(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; if (Cond->getPredicate() == CmpInst::ICMP_EQ) Pred = CmpInst::getInversePredicate(Pred); diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll new file mode 100644 index 00000000000..bf8bfa28daf --- /dev/null +++ b/test/CodeGen/X86/optimize-max-3.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; LSR's OptimizeMax should eliminate the select (max). + +; CHECK: foo: +; CHECK-NOT: cmov +; CHECK: jle + +define void @foo(i64 %n, double* nocapture %p) nounwind { +entry: + %cmp6 = icmp slt i64 %n, 0 ; [#uses=1] + br i1 %cmp6, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %tmp = icmp sgt i64 %n, 0 ; [#uses=1] + %n.op = add i64 %n, 1 ; [#uses=1] + %tmp1 = select i1 %tmp, i64 %n.op, i64 1 ; [#uses=1] + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; [#uses=2] + %arrayidx = getelementptr double* %p, i64 %i ; [#uses=2] + %t4 = load double* %arrayidx ; [#uses=1] + %mul = fmul double %t4, 2.200000e+00 ; [#uses=1] + store double %mul, double* %arrayidx + %i.next = add nsw i64 %i, 1 ; [#uses=2] + %exitcond = icmp eq i64 %i.next, %tmp1 ; [#uses=1] + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +}