From 16404cc817e8b8d3fbfbc9051394eb59d7c5ffb4 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 12 Jul 2013 22:08:48 +0000 Subject: [PATCH] LFTR improvement to avoid truncation. This is a reimplemntation of the patch originally in r186107. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186215 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 38 +++++++++++++--- .../IndVarSimplify/lftr-extend-const.ll | 44 +++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 test/Transforms/IndVarSimplify/lftr-extend-const.ll diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index f0ac637968c..d51e034861d 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1590,15 +1590,41 @@ LinearFunctionTestReplace(Loop *L, << " RHS:\t" << *ExitCnt << "\n" << " IVCount:\t" << *IVCount << "\n"); + IRBuilder<> Builder(BI); + // LFTR can ignore IV overflow and truncate to the width of // BECount. This avoids materializing the add(zext(add)) expression. - IRBuilder<> Builder(BI); - if (SE->getTypeSizeInBits(CmpIndVar->getType()) - > SE->getTypeSizeInBits(ExitCnt->getType())) { - CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), - "lftr.wideiv"); - } + unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); + unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); + if (CmpIndVarSize > ExitCntSize) { + const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar)); + const SCEV *ARStart = AR->getStart(); + const SCEV *ARStep = AR->getStepRecurrence(*SE); + // For constant IVCount, avoid truncation. + if (isa(ARStart) && isa(IVCount)) { + const APInt &Start = cast(ARStart)->getValue()->getValue(); + APInt Count = cast(IVCount)->getValue()->getValue(); + // Note that the post-inc value of BackedgeTakenCount may have overflowed + // above such that IVCount is now zero. + if (IVCount != BackedgeTakenCount && Count == 0) { + Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize); + ++Count; + } + else + Count = Count.zext(CmpIndVarSize); + APInt NewLimit; + if (cast(ARStep)->getValue()->isNegative()) + NewLimit = Start - Count; + else + NewLimit = Start + Count; + ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit); + DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n"); + } else { + CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), + "lftr.wideiv"); + } + } Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); Value *OrigCond = BI->getCondition(); // It's tempting to use replaceAllUsesWith here to fully replace the old diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll new file mode 100644 index 00000000000..18e1507764b --- /dev/null +++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll @@ -0,0 +1,44 @@ +;RUN: opt -S %s -indvars | FileCheck %s + +; CHECK-LABEL: @foo +; CHECK-NOT: %lftr.wideiv = trunc i32 %indvars.iv.next to i16 +; CHECK: %exitcond = icmp ne i32 %indvars.iv.next, 512 +define void @foo() #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.01 = phi i16 [ 0, %entry ], [ %inc, %for.body ] + %conv2 = sext i16 %i.01 to i32 + call void @bar(i32 %conv2) #1 + %inc = add i16 %i.01, 1 + %cmp = icmp slt i16 %inc, 512 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Check that post-incrementing the backedge taken count does not overflow. +; CHECK-LABEL: @postinc +; CHECK: icmp eq i32 %indvars.iv.next, 256 +define i32 @postinc() #0 { +entry: + br label %do.body + +do.body: ; preds = %do.body, %entry + %first.0 = phi i8 [ 0, %entry ], [ %inc, %do.body ] + %conv = zext i8 %first.0 to i32 + call void @bar(i32 %conv) #1 + %inc = add i8 %first.0, 1 + %cmp = icmp eq i8 %first.0, -1 + br i1 %cmp, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret i32 0 +} + +declare void @bar(i32) + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind }