From 35115f92e4a1e19921af4ae43db6e31f9a633738 Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Fri, 15 Aug 2008 21:21:34 +0000 Subject: [PATCH] Reapply 54786. Add overflow and number of mantissa bits checks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54821 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 119 +++++++++++++++++- .../LoopStrengthReduce/2008-08-14-ShadowIV.ll | 99 +++++++++++++++ 2 files changed, 217 insertions(+), 1 deletion(-) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 04faa0aa508..40bbe95f7e9 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -45,6 +45,7 @@ STATISTIC(NumReduced , "Number of GEPs strength reduced"); STATISTIC(NumInserted, "Number of PHIs inserted"); STATISTIC(NumVariable, "Number of PHIs with variable strides"); STATISTIC(NumEliminated , "Number of strides eliminated"); +STATISTIC(NumShadow , "Number of Shadow IVs optimized"); namespace { @@ -177,8 +178,13 @@ private: IVStrideUse* &CondUse, const SCEVHandle* &CondStride); void OptimizeIndvars(Loop *L); + + /// OptimizeShadowIV - If IV is used in a int-to-float cast + /// inside the loop then try to eliminate the cast opeation. + void OptimizeShadowIV(Loop *L, ICmpInst *Cond, + const SCEVHandle *&CondStride); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEVHandle *&CondStride); + const SCEVHandle *&CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); unsigned CheckForIVReuse(bool, bool, const SCEVHandle&, IVExpr&, const Type*, @@ -1689,6 +1695,115 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, return Cond; } +/// OptimizeShadowIV - If IV is used in a int-to-float cast +/// inside the loop then try to eliminate the cast opeation. +void LoopStrengthReduce::OptimizeShadowIV(Loop *L, ICmpInst *Cond, + const SCEVHandle *&CondStride) { + + const SCEVConstant *SC = dyn_cast(*CondStride); + if (!SC) return; + + SCEVHandle IterationCount = SE->getIterationCount(L); + if (isa(IterationCount)) + return; + + for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; + ++Stride) { + std::map::iterator SI = + IVUsesByStride.find(StrideOrder[Stride]); + assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); + + for (std::vector::iterator UI = SI->second.Users.begin(), + E = SI->second.Users.end(); UI != E; /* empty */) { + std::vector::iterator CandidateUI = UI; + UI++; + Instruction *ShadowUse = CandidateUI->User; + const Type *DestTy = NULL; + + /* If shadow use is a int->float cast then insert a second IV + to elminate this cast. + + for (unsigned i = 0; i < n; ++i) + foo((double)i); + + is trnasformed into + + double d = 0.0; + for (unsigned i = 0; i < n; ++i, ++d) + foo(d); + */ + UIToFPInst *UCast = dyn_cast(CandidateUI->User); + if (UCast) + DestTy = UCast->getDestTy(); + else { + SIToFPInst *SCast = dyn_cast(CandidateUI->User); + if (!SCast) continue; + DestTy = SCast->getDestTy(); + } + + PHINode *PH = dyn_cast(ShadowUse->getOperand(0)); + if (!PH) continue; + if (PH->getNumIncomingValues() != 2) continue; + + const Type *SrcTy = PH->getType(); + int Mantissa = DestTy->getFPMantissaWidth(); + if (Mantissa == -1) continue; + if ((int)TD->getTypeSizeInBits(SrcTy) > Mantissa) + continue; + + unsigned Entry, Latch; + if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { + Entry = 0; + Latch = 1; + } else { + Entry = 1; + Latch = 0; + } + + ConstantInt *Init = dyn_cast(PH->getIncomingValue(Entry)); + if (!Init) continue; + ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + + BinaryOperator *Incr = + dyn_cast(PH->getIncomingValue(Latch)); + if (!Incr) continue; + if (Incr->getOpcode() != Instruction::Add + && Incr->getOpcode() != Instruction::Sub) + continue; + + /* Initialize new IV, double d = 0.0 in above example. */ + ConstantInt *C = NULL; + if (Incr->getOperand(0) == PH) + C = dyn_cast(Incr->getOperand(1)); + else if (Incr->getOperand(1) == PH) + C = dyn_cast(Incr->getOperand(0)); + else + continue; + + if (!C) continue; + + /* Add new PHINode. */ + PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + + /* create new icnrement. '++d' in above example. */ + ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue()); + BinaryOperator *NewIncr = + BinaryOperator::Create(Incr->getOpcode(), + NewPH, CFP, "IV.S.next.", Incr); + + NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); + NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); + + /* Remove cast operation */ + ShadowUse->replaceAllUsesWith(NewPH); + ShadowUse->eraseFromParent(); + SI->second.Users.erase(CandidateUI); + NumShadow++; + break; + } + } +} + // OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar // uses in the loop, look to see if we can eliminate some, in favor of using // common indvars for the different uses. @@ -1716,6 +1831,8 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { if (!FindIVUserForCond(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. + OptimizeShadowIV(L, Cond, CondStride); + // If possible, change stride and operands of the compare instruction to // eliminate one stride. Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); diff --git a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll index e69de29bb2d..2377589b0eb 100644 --- a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll +++ b/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll @@ -0,0 +1,99 @@ +; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep "phi double" | count 1 + +define void @foobar(i32 %n) nounwind { +entry: + icmp eq i32 %n, 0 ; :0 [#uses=2] + br i1 %0, label %return, label %bb.nph + +bb.nph: ; preds = %entry + %umax = select i1 %0, i32 1, i32 %n ; [#uses=1] + br label %bb + +bb: ; preds = %bb, %bb.nph + %i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; [#uses=3] + tail call void @bar( i32 %i.03 ) nounwind + uitofp i32 %i.03 to double ; :1 [#uses=1] + tail call void @foo( double %1 ) nounwind + %indvar.next = add i32 %i.03, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %umax ; [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +; Unable to eliminate cast because the mantissa bits for double are not enough +; to hold all of i64 IV bits. +define void @foobar2(i64 %n) nounwind { +entry: + icmp eq i64 %n, 0 ; :0 [#uses=2] + br i1 %0, label %return, label %bb.nph + +bb.nph: ; preds = %entry + %umax = select i1 %0, i64 1, i64 %n ; [#uses=1] + br label %bb + +bb: ; preds = %bb, %bb.nph + %i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; [#uses=3] + trunc i64 %i.03 to i32 ; :1 [#uses=1] + tail call void @bar( i32 %1 ) nounwind + uitofp i64 %i.03 to double ; :2 [#uses=1] + tail call void @foo( double %2 ) nounwind + %indvar.next = add i64 %i.03, 1 ; [#uses=2] + %exitcond = icmp eq i64 %indvar.next, %umax ; [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +; Unable to eliminate cast due to potentional overflow. +define void @foobar3() nounwind { +entry: + tail call i32 (...)* @nn( ) nounwind ; :0 [#uses=1] + icmp eq i32 %0, 0 ; :1 [#uses=1] + br i1 %1, label %return, label %bb + +bb: ; preds = %bb, %entry + %i.03 = phi i32 [ 0, %entry ], [ %3, %bb ] ; [#uses=3] + tail call void @bar( i32 %i.03 ) nounwind + uitofp i32 %i.03 to double ; :2 [#uses=1] + tail call void @foo( double %2 ) nounwind + add i32 %i.03, 1 ; :3 [#uses=2] + tail call i32 (...)* @nn( ) nounwind ; :4 [#uses=1] + icmp ugt i32 %4, %3 ; :5 [#uses=1] + br i1 %5, label %bb, label %return + +return: ; preds = %bb, %entry + ret void +} + +; Unable to eliminate cast due to overflow. +define void @foobar4() nounwind { +entry: + br label %bb.nph + +bb.nph: ; preds = %entry + br label %bb + +bb: ; preds = %bb, %bb.nph + %i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; [#uses=3] + %tmp2 = sext i8 %i.03 to i32 ; :0 [#uses=1] + tail call void @bar( i32 %tmp2 ) nounwind + %tmp3 = uitofp i8 %i.03 to double ; :1 [#uses=1] + tail call void @foo( double %tmp3 ) nounwind + %indvar.next = add i8 %i.03, 1 ; [#uses=2] + %tmp = sext i8 %indvar.next to i32 + %exitcond = icmp eq i32 %tmp, 32767 ; [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +declare void @bar(i32) + +declare void @foo(double) + +declare i32 @nn(...) +