diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h index 7e97e218fb0..3c3de467c45 100644 --- a/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -22,6 +22,7 @@ namespace llvm { class CastInst; +class DominatorTree; class IVUsers; class Loop; class LPPassManager; @@ -31,9 +32,25 @@ class ScalarEvolution; /// Interface for visiting interesting IV users that are recognized but not /// simplified by this utility. class IVVisitor { +protected: + const DominatorTree *DT; + bool ShouldSplitOverflowIntrinsics; + virtual void anchor(); public: + IVVisitor(): DT(NULL), ShouldSplitOverflowIntrinsics(false) {} virtual ~IVVisitor() {} + + const DominatorTree *getDomTree() const { return DT; } + + bool shouldSplitOverflowInstrinsics() const { + return ShouldSplitOverflowIntrinsics; + } + void setSplitOverflowIntrinsics() { + ShouldSplitOverflowIntrinsics = true; + assert(DT && "Splitting overflow intrinsics requires a DomTree."); + } + virtual void visitCast(CastInst *Cast) = 0; }; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 235aaaa6f80..c291f68bd63 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -63,6 +63,9 @@ static cl::opt VerifyIndvars( "verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars")); +static cl::opt ReduceLiveIVs("liv-reduce", cl::Hidden, + cl::desc("Reduce live induction variables.")); + namespace { class IndVarSimplify : public LoopPass { LoopInfo *LI; @@ -643,8 +646,11 @@ namespace { WideIVInfo WI; WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV, - const DataLayout *TData) : - SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; } + const DataLayout *TData, const DominatorTree *DTree): + SE(SCEV), TD(TData) { + DT = DTree; + WI.NarrowIV = NarrowIV; + } // Implement the interface used by simplifyUsersOfIV. virtual void visitCast(CastInst *Cast); @@ -1114,7 +1120,9 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. - WideIVVisitor WIV(CurrIV, SE, TD); + WideIVVisitor WIV(CurrIV, SE, TD, DT); + if (ReduceLiveIVs) + WIV.setSplitOverflowIntrinsics(); Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV); diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index bf3442aeaaa..d1f6c5c62a2 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -18,12 +18,16 @@ #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -75,6 +79,9 @@ namespace { void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); + + Instruction *splitOverflowIntrinsic(Instruction *IVUser, + const DominatorTree *DT); }; } @@ -263,6 +270,71 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, return true; } +/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow +/// analysis and optimization. +/// +/// \return A new value representing the non-overflowing add if possible, +/// otherwise return the original value. +Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, + const DominatorTree *DT) { + IntrinsicInst *II = dyn_cast(IVUser); + if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow) + return IVUser; + + // Find a branch guarded by the overflow check. + BranchInst *Branch = 0; + Instruction *AddVal = 0; + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) { + if (ExtractValueInst *ExtractInst = dyn_cast(*UI)) { + if (ExtractInst->getNumIndices() != 1) + continue; + if (ExtractInst->getIndices()[0] == 0) + AddVal = ExtractInst; + else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse()) + Branch = dyn_cast(ExtractInst->use_back()); + } + } + if (!AddVal || !Branch) + return IVUser; + + BasicBlock *ContinueBB = Branch->getSuccessor(1); + if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB)) + return IVUser; + + // Check if all users of the add are provably NSW. + bool AllNSW = true; + for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end(); + UI != E; ++UI) { + if (Instruction *UseInst = dyn_cast(*UI)) { + BasicBlock *UseBB = UseInst->getParent(); + if (PHINode *PHI = dyn_cast(UseInst)) + UseBB = PHI->getIncomingBlock(UI); + if (!DT->dominates(ContinueBB, UseBB)) { + AllNSW = false; + break; + } + } + } + if (!AllNSW) + return IVUser; + + // Go for it... + IRBuilder<> Builder(IVUser); + Instruction *AddInst = dyn_cast( + Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1))); + + // The caller expects the new add to have the same form as the intrinsic. The + // IV operand position must be the same. + assert((AddInst->getOpcode() == Instruction::Add && + AddInst->getOperand(0) == II->getOperand(0)) && + "Bad add instruction created from overflow intrinsic."); + + AddVal->replaceAllUsesWith(AddInst); + DeadInsts.push_back(AddVal); + return AddInst; +} + /// pushIVUsers - Add all uses of Def to the current IV's worklist. /// static void pushIVUsers( @@ -334,8 +406,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { while (!SimpleIVUsers.empty()) { std::pair UseOper = SimpleIVUsers.pop_back_val(); + Instruction *UseInst = UseOper.first; + // Bypass back edges to avoid extra work. - if (UseOper.first == CurrIV) continue; + if (UseInst == CurrIV) continue; + + if (V && V->shouldSplitOverflowInstrinsics()) { + UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree()); + if (!UseInst) + continue; + } Instruction *IVOperand = UseOper.second; for (unsigned N = 0; IVOperand; ++N) { diff --git a/test/Transforms/IndVarSimplify/overflowcheck.ll b/test/Transforms/IndVarSimplify/overflowcheck.ll new file mode 100644 index 00000000000..2603f363ab6 --- /dev/null +++ b/test/Transforms/IndVarSimplify/overflowcheck.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +; CHECK-LABEL: @addwithoverflow +; CHECK-LABEL: loop1: +; CHECK-NOT: zext +; CHECK: add nsw +; CHECK: @llvm.sadd.with.overflow +; CHECK-LABEL: loop2: +; CHECK-NOT: extractvalue +; CHECK: add nuw nsw +; CHECK: @llvm.sadd.with.overflow +; CHECK-LABEL: loop3: +; CHECK-NOT: extractvalue +; CHECK: ret +define i64 @addwithoverflow(i32 %n, i64* %a) { +entry: + br label %loop0 + +loop0: + %i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ] + %s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ] + %bc = icmp ult i32 %i, %n + br i1 %bc, label %loop1, label %exit + +loop1: + %zxt = zext i32 %i to i64 + %ofs = shl nuw nsw i64 %zxt, 3 + %gep = getelementptr i64* %a, i64 %zxt + %v = load i64* %gep, align 8 + %truncv = trunc i64 %v to i32 + %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv) + %ovflows = extractvalue { i32, i1 } %adds, 1 + br i1 %ovflows, label %exit, label %loop2 + +loop2: + %addsval = extractvalue { i32, i1 } %adds, 0 + %i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1) + %i1check = extractvalue { i32, i1 } %i1, 1 + br i1 %i1check, label %exit, label %loop3 + +loop3: + %i1val = extractvalue { i32, i1 } %i1, 0 + %test = icmp slt i32 %i1val, %n + br i1 %test, label %return, label %loop0 + +return: + %ret = zext i32 %addsval to i64 + ret i64 %ret + +exit: + unreachable +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)