diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 93270772f03..c4dca2cd151 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1011,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Walk through the backedges of the function now that we have fully laid out // the basic blocks and align the destination of each backedge. We don't rely - // on the loop info here so that we can align backedges in unnatural CFGs and - // backedges that were introduced purely because of the loop rotations done - // during this layout pass. - // FIXME: This isn't quite right, we shouldn't align backedges that result - // from blocks being sunken below the exit block for the function. + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. - SmallPtrSet PreviousBlocks; - for (BlockChain::iterator BI = FunctionChain.begin(), + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { - PreviousBlocks.insert(*BI); - // Set alignment on the destination of all the back edges in the new - // ordering. - for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), - SE = (*BI)->succ_end(); - SI != SE; ++SI) - if (PreviousBlocks.count(*SI)) - (*SI)->setAlignment(Align); + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(*BI); + if (!L) + continue; + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(*BI); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = *llvm::prior(BI); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(*BI)) { + (*BI)->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, othe predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + (*BI)->setAlignment(Align); } } diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll index 66f06778bde..b2cf34cd203 100644 --- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll @@ -17,8 +17,7 @@ bb: ; preds = %bb, %entry ; CHECK: %bb30.loopexit ; CHECK: divsd %xmm0 ; CHECK: movsd %xmm0, 16(%esp) -; CHECK: .align -; CHECK-NEXT: %bb3 +; CHECK: %bb3 bb3: ; preds = %bb30.loopexit, %bb25, %bb3 %2 = load i32* null, align 4 ; [#uses=1] %3 = mul i32 %2, 0 ; [#uses=1] diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll index ab23b88f49e..5534712af83 100644 --- a/test/CodeGen/X86/block-placement.ll +++ b/test/CodeGen/X86/block-placement.ll @@ -7,10 +7,15 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) { ; that is not expected to run. ; CHECK: test_ifchains: ; CHECK: %entry +; CHECK-NOT: .align ; CHECK: %else1 +; CHECK-NOT: .align ; CHECK: %else2 +; CHECK-NOT: .align ; CHECK: %else3 +; CHECK-NOT: .align ; CHECK: %else4 +; CHECK-NOT: .align ; CHECK: %exit ; CHECK: %then1 ; CHECK: %then2 @@ -76,8 +81,11 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { ; Check that we sink cold loop blocks after the hot loop body. ; CHECK: test_loop_cold_blocks: ; CHECK: %entry +; CHECK-NOT: .align ; CHECK: %unlikely1 +; CHECK-NOT: .align ; CHECK: %unlikely2 +; CHECK: .align ; CHECK: %body1 ; CHECK: %body2 ; CHECK: %body3 diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll index d14102fe245..4bd162b4529 100644 --- a/test/CodeGen/X86/loop-blocks.ll +++ b/test/CodeGen/X86/loop-blocks.ll @@ -41,7 +41,6 @@ done: ; CHECK-NEXT: align ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: callq bar99 -; CHECK-NEXT: align ; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: callq body @@ -79,7 +78,6 @@ exit: ; CHECK-NEXT: .LBB2_5: ; CHECK-NEXT: callq block_a_true_func ; CHECK-NEXT: callq block_a_merge_func -; CHECK-NEXT: align ; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: callq body ; @@ -139,13 +137,13 @@ exit: ; CHECK-NEXT: align ; CHECK-NEXT: .LBB3_7: ; CHECK-NEXT: callq bar100 -; CHECK-NEXT: align ; CHECK-NEXT: .LBB3_1: ; CHECK-NEXT: callq loop_header ; CHECK: jl .LBB3_7 ; CHECK: jge .LBB3_3 ; CHECK-NEXT: callq bar101 ; CHECK-NEXT: jmp .LBB3_1 +; CHECK-NEXT: align ; CHECK-NEXT: .LBB3_3: ; CHECK: jge .LBB3_4 ; CHECK-NEXT: callq bar102 diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll index a9b815fc459..c3b8b8910d5 100644 --- a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll +++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll @@ -99,7 +99,6 @@ while.end: ; preds = %entry ; CHECK: %for.body3.lr.ph.us.i.loopexit ; CHECK-NEXT: in Loop: Header ; CHECK-NEXT: incq -; CHECK-NEXT: .align ; CHECK-NEXT: %for.body3.us.i ; CHECK-NEXT: Inner Loop ; CHECK: testb