diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 6226237c6d4..511a55821d5 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -215,6 +215,8 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineLoop &L, const BlockFilterSet &LoopBlockSet); void buildLoopChains(MachineFunction &F, MachineLoop &L); + void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet); void buildCFGChains(MachineFunction &F); public: @@ -659,6 +661,54 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, return ExitingBB; } +/// \brief Attempt to rotate an exiting block to the bottom of the loop. +/// +/// Once we have built a chain, try to rotate it to line up the hot exit block +/// with fallthrough out of the loop if doing so doesn't introduce unnecessary +/// branches. For example, if the loop has fallthrough into its header and out +/// of its bottom already, don't rotate it. +void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, + MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet) { + if (!ExitingBB) + return; + + MachineBasicBlock *Top = *LoopChain.begin(); + bool ViableTopFallthrough = false; + for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), + PE = Top->pred_end(); + PI != PE; ++PI) { + BlockChain *PredChain = BlockToChain[*PI]; + if (!LoopBlockSet.count(*PI) && + (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + ViableTopFallthrough = true; + break; + } + } + + // If the header has viable fallthrough, check whether the current loop + // bottom is a viable exiting block. If so, bail out as rotating will + // introduce an unnecessary branch. + if (ViableTopFallthrough) { + MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), + SE = Bottom->succ_end(); + SI != SE; ++SI) { + BlockChain *SuccChain = BlockToChain[*SI]; + if (!LoopBlockSet.count(*SI) && + (!SuccChain || *SI == *SuccChain->begin())) + return; + } + } + + BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), + ExitingBB); + if (ExitIt == LoopChain.end()) + return; + + std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); +} + /// \brief Forms basic block chains from the natural loop structures. /// /// These chains are designed to preserve the existing *structure* of the code @@ -709,17 +759,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, } buildChain(L.getHeader(), LoopChain, BlockWorkList, &LoopBlockSet); - - // Once we have built a chain, try to rotate it to line up the hot exit block - // with fallthrough out of the loop (if we have a valid exit block for that). - if (ExitingBB) { - BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), - ExitingBB); - - if (ExitIt != LoopChain.end()) { - std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); - } - } + rotateLoop(LoopChain, ExitingBB, LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll index f3c97276051..9359fb8c12c 100644 --- a/test/CodeGen/X86/block-placement.ll +++ b/test/CodeGen/X86/block-placement.ll @@ -122,14 +122,14 @@ define i32 @test_loop_early_exits(i32 %i, i32* %a) { ; Check that we sink early exit blocks out of loop bodies. ; CHECK: test_loop_early_exits: ; CHECK: %entry +; CHECK: %body1 ; CHECK: %body2 ; CHECK: %body3 ; CHECK: %body4 -; CHECK: %body1 +; CHECK: %exit ; CHECK: %bail1 ; CHECK: %bail2 ; CHECK: %bail3 -; CHECK: %exit entry: br label %body1 @@ -199,6 +199,36 @@ exit: ret i32 %base } +define i32 @test_no_loop_rotate(i32 %i, i32* %a) { +; Check that we don't try to rotate a loop which is already laid out with +; fallthrough opportunities into the top and out of the bottom. +; CHECK: test_no_loop_rotate: +; CHECK: %entry +; CHECK: %body0 +; CHECK: %body1 +; CHECK: %exit + +entry: + br label %body0 + +body0: + %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] + %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] + %arrayidx = getelementptr inbounds i32* %a, i32 %iv + %0 = load i32* %arrayidx + %sum = add nsw i32 %0, %base + %bailcond1 = icmp eq i32 %sum, 42 + br i1 %bailcond1, label %exit, label %body1 + +body1: + %next = add i32 %iv, 1 + %exitcond = icmp eq i32 %next, %i + br i1 %exitcond, label %exit, label %body0 + +exit: + ret i32 %base +} + define void @test_loop_rotate_reversed_blocks() { ; This test case (greatly reduced from an Olden bencmark) ensures that the loop ; rotate implementation doesn't assume that loops are laid out in a particular