diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 2e1d12d2346..61a8b12860a 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -62,6 +62,8 @@ namespace { private: bool OptimizeIntraLoopEdges(); + bool HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L, + SmallPtrSet &DoNotAlign); bool AlignLoops(MachineFunction &MF); }; @@ -244,14 +246,37 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges() { /// should be aligned. For now, we will not align it if all the predcessors /// (i.e. loop back edges) are laid out above the header. FIXME: Do not /// align small loops. -static bool HeaderShouldBeAligned(MachineBasicBlock *MBB) { +bool +CodePlacementOpt::HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L, + SmallPtrSet &DoNotAlign) { + if (DoNotAlign.count(MBB)) + return false; + + bool BackEdgeBelow = false; for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *PredMBB = *PI; - if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) - return true; + if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) { + BackEdgeBelow = true; + break; + } } - return false; + + if (!BackEdgeBelow) + return false; + + // Ok, we are going to align this loop header. If it's an inner loop, + // do not align its outer loop. + MachineBasicBlock *PreHeader = L->getLoopPreheader(); + if (PreHeader) { + MachineLoop *L = MLI->getLoopFor(PreHeader); + if (L) { + MachineBasicBlock *HeaderBlock = L->getHeader(); + HeaderBlock->setAlignment(0); + DoNotAlign.insert(HeaderBlock); + } + } + return true; } /// AlignLoops - Align loop headers to target preferred alignments. @@ -269,14 +294,16 @@ bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { MF.RenumberBlocks(); bool Changed = false; + SmallPtrSet DoNotAlign; for (unsigned i = 0, e = LoopHeaders.size(); i != e; ++i) { MachineBasicBlock *HeaderMBB = LoopHeaders[i]; MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(HeaderMBB)); - if (MLI->getLoopFor(HeaderMBB) == MLI->getLoopFor(PredMBB)) + MachineLoop *L = MLI->getLoopFor(HeaderMBB); + if (L == MLI->getLoopFor(PredMBB)) // If previously BB is in the same loop, don't align this BB. We want // to prevent adding noop's inside a loop. continue; - if (HeaderShouldBeAligned(HeaderMBB)) { + if (HeaderShouldBeAligned(HeaderMBB, L, DoNotAlign)) { HeaderMBB->setAlignment(Align); Changed = true; ++NumHeaderAligned; diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll new file mode 100644 index 00000000000..9f0aeb32c41 --- /dev/null +++ b/test/CodeGen/X86/avoid-loop-align-2.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep align | count 3 + +@x = external global i32* ; [#uses=1] + +define i32 @t(i32 %a, i32 %b) nounwind readonly ssp { +entry: + %0 = icmp eq i32 %a, 0 ; [#uses=1] + br i1 %0, label %bb5, label %bb.nph12 + +bb.nph12: ; preds = %entry + %1 = icmp eq i32 %b, 0 ; [#uses=1] + %2 = load i32** @x, align 8 ; [#uses=1] + br i1 %1, label %bb2.preheader, label %bb2.preheader.us + +bb2.preheader.us: ; preds = %bb2.bb3_crit_edge.us, %bb.nph12 + %indvar18 = phi i32 [ 0, %bb.nph12 ], [ %indvar.next19, %bb2.bb3_crit_edge.us ] ; [#uses=2] + %sum.111.us = phi i32 [ 0, %bb.nph12 ], [ %4, %bb2.bb3_crit_edge.us ] ; [#uses=0] + %tmp16 = mul i32 %indvar18, %a ; [#uses=1] + br label %bb1.us + +bb1.us: ; preds = %bb1.us, %bb2.preheader.us + %indvar = phi i32 [ 0, %bb2.preheader.us ], [ %indvar.next, %bb1.us ] ; [#uses=2] + %tmp17 = add i32 %indvar, %tmp16 ; [#uses=1] + %tmp. = zext i32 %tmp17 to i64 ; [#uses=1] + %3 = getelementptr i32* %2, i64 %tmp. ; [#uses=1] + %4 = load i32* %3, align 4 ; [#uses=2] + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %b ; [#uses=1] + br i1 %exitcond, label %bb2.bb3_crit_edge.us, label %bb1.us + +bb2.bb3_crit_edge.us: ; preds = %bb1.us + %indvar.next19 = add i32 %indvar18, 1 ; [#uses=2] + %exitcond22 = icmp eq i32 %indvar.next19, %a ; [#uses=1] + br i1 %exitcond22, label %bb5, label %bb2.preheader.us + +bb2.preheader: ; preds = %bb2.preheader, %bb.nph12 + %indvar24 = phi i32 [ %indvar.next25, %bb2.preheader ], [ 0, %bb.nph12 ] ; [#uses=1] + %indvar.next25 = add i32 %indvar24, 1 ; [#uses=2] + %exitcond28 = icmp eq i32 %indvar.next25, %a ; [#uses=1] + br i1 %exitcond28, label %bb5, label %bb2.preheader + +bb5: ; preds = %bb2.preheader, %bb2.bb3_crit_edge.us, %entry + %sum.1.lcssa = phi i32 [ 0, %entry ], [ 0, %bb2.preheader ], [ %4, %bb2.bb3_crit_edge.us ] ; [#uses=1] + ret i32 %sum.1.lcssa +}