diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 98a68f6461b..dd423fa3b17 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -19,6 +19,7 @@ namespace llvm { class User; class BasicBlock; +class BranchInst; class Instruction; class Value; class Pass; @@ -94,6 +95,12 @@ void MergeBasicBlockIntoOnlyPred(BasicBlock *BB); /// bool SimplifyCFG(BasicBlock *BB); +/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, +/// and if a predecessor branches to us and one of our successors, fold the +/// setcc into the predecessor and use logical operations to pick the right +/// destination. +bool FoldBranchToCommonDest(BranchInst *BI); + /// DemoteRegToStack - This function takes a virtual register computed by an /// Instruction and replaces it with a slot in the stack frame, allocated via /// alloca. This allows the CFG to be changed around without fear of diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 03d273d25d7..d03591081fb 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -42,6 +42,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" #include "llvm/ADT/SetOperations.h" @@ -258,6 +259,80 @@ ReprocessLoop: PN->eraseFromParent(); } + // If this loop has muliple exits and the exits all go to the same + // block, attempt to merge the exits. This helps several passes, such + // as LoopRotation, which do not support loops with multiple exits. + // SimplifyCFG also does this (and this code uses the same utility + // function), however this code is loop-aware, where SimplifyCFG is + // not. That gives it the advantage of being able to hoist + // loop-invariant instructions out of the way to open up more + // opportunities, and the disadvantage of having the responsibility + // to preserve dominator information. + if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; + if (!ExitingBlock->getSinglePredecessor()) continue; + BranchInst *BI = dyn_cast(ExitingBlock->getTerminator()); + if (!BI || !BI->isConditional()) continue; + CmpInst *CI = dyn_cast(BI->getCondition()); + if (!CI || CI->getParent() != ExitingBlock) continue; + + // Attempt to hoist out all instructions except for the + // comparison and the branch. + bool AllInvariant = true; + for (BasicBlock::iterator I = ExitingBlock->begin(), + E = ExitingBlock->end(); I != E; ) { + Instruction *Inst = I++; + if (Inst == BI || Inst == CI) + continue; + if (Inst->isTrapping()) { + AllInvariant = false; + break; + } + for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j) + if (!L->isLoopInvariant(Inst->getOperand(j))) { + AllInvariant = false; + break; + } + if (!AllInvariant) + break; + // Hoist. + Inst->moveBefore(L->getLoopPreheader()->getTerminator()); + } + if (!AllInvariant) continue; + + // The block has now been cleared of all instructions except for + // a comparison and a conditional branch. SimplifyCFG may be able + // to fold it now. + if (!FoldBranchToCommonDest(BI)) continue; + + // Success. The block is now dead, so remove it from the loop, + // update the dominator tree and dominance frontier, and delete it. + assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); + Changed = true; + L->removeBlockFromLoop(ExitingBlock); + + DominanceFrontier *DF = getAnalysisIfAvailable(); + DomTreeNode *Node = DT->getNode(ExitingBlock); + const std::vector *> &Children = + Node->getChildren(); + for (unsigned k = 0, g = Children.size(); k != g; ++k) { + DT->changeImmediateDominator(Children[k], Node->getIDom()); + if (DF) DF->changeImmediateDominator(Children[k]->getBlock(), + Node->getIDom()->getBlock(), + DT); + } + DT->eraseNode(ExitingBlock); + if (DF) DF->removeBlock(ExitingBlock); + + BI->getSuccessor(0)->removePredecessor(ExitingBlock); + BI->getSuccessor(1)->removePredecessor(ExitingBlock); + ExitingBlock->eraseFromParent(); + } + } + return Changed; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index ee0f6a65de4..58d4d5a344c 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1492,7 +1492,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { /// and if a predecessor branches to us and one of our successors, fold the /// setcc into the predecessor and use logical operations to pick the right /// destination. -static bool FoldBranchToCommonDest(BranchInst *BI) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); Instruction *Cond = dyn_cast(BI->getCondition()); if (Cond == 0) return false; diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll new file mode 100644 index 00000000000..c5bf7fdc3c4 --- /dev/null +++ b/test/Transforms/LoopSimplify/merge-exits.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as < %s | opt -loopsimplify -loop-rotate -instcombine -indvars \ +; RUN: | llvm-dis > %t +; RUN: not grep sext %t +; RUN: grep {phi i64} %t | count 1 + +; Loopsimplify should be able to merge the two loop exits +; into one, so that loop rotate can rotate the loop, so +; that indvars can promote the induction variable to i64 +; without needing casts. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + +define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind { +entry: + %t0 = load float* %peakWeight, align 4 ; [#uses=1] + br label %bb1 + +bb: ; preds = %bb2 + %t1 = sext i32 %hiPart.0 to i64 ; [#uses=1] + %t2 = getelementptr float* %pTmp1, i64 %t1 ; [#uses=1] + %t3 = load float* %t2, align 4 ; [#uses=1] + %t4 = fadd float %t3, %distERBhi.0 ; [#uses=1] + %t5 = add i32 %hiPart.0, 1 ; [#uses=2] + %t6 = sext i32 %t5 to i64 ; [#uses=1] + %t7 = getelementptr float* %peakWeight, i64 %t6 ; [#uses=1] + %t8 = load float* %t7, align 4 ; [#uses=1] + %t9 = fadd float %t8, %peakCount.0 ; [#uses=1] + br label %bb1 + +bb1: ; preds = %bb, %entry + %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ] ; [#uses=2] + %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ] ; [#uses=3] + %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ] ; [#uses=3] + %t10 = fcmp uge float %distERBhi.0, 2.500000e+00 ; [#uses=1] + br i1 %t10, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + %t11 = add i32 %bandEdgeIndex, -1 ; [#uses=1] + %t12 = icmp sgt i32 %t11, %hiPart.0 ; [#uses=1] + br i1 %t12, label %bb, label %bb3 + +bb3: ; preds = %bb2, %bb1 + %t13 = fdiv float %peakCount.0, %distERBhi.0 ; [#uses=1] + ret float %t13 +}