During the CodeGenPrepare we often lower intrinsics (such as objsize)

and allow some optimizations to turn conditional branches into unconditional.
This commit adds a simple control-flow optimization which merges two consecutive
basic blocks which are connected by a single edge. This allows the codegen to
operate on larger basic blocks.

rdar://11973998



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161852 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2012-08-14 05:19:07 +00:00
parent 443c9ed768
commit 3e883734fa
8 changed files with 62 additions and 22 deletions

View File

@ -116,6 +116,7 @@ namespace {
} }
private: private:
bool EliminateFallThrough(Function &F);
bool EliminateMostlyEmptyBlocks(Function &F); bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB); void EliminateMostlyEmptyBlock(BasicBlock *BB);
@ -192,6 +193,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
I = WorkList.begin(), E = WorkList.end(); I != E; ++I) I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
DeleteDeadBlock(*I); DeleteDeadBlock(*I);
// Merge pairs of basic blocks with unconditional branches, connected by
// a single edge.
if (EverMadeChange || MadeChange)
MadeChange |= EliminateFallThrough(F);
if (MadeChange) if (MadeChange)
ModifiedDT = true; ModifiedDT = true;
EverMadeChange |= MadeChange; EverMadeChange |= MadeChange;
@ -203,6 +209,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange; return EverMadeChange;
} }
/// EliminateFallThrough - Merge basic blocks which are connected
/// by a single edge, where one of the basic blocks has a single successor
/// pointing to the other basic block, which has a single predecessor.
bool CodeGenPrepare::EliminateFallThrough(Function &F) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
BasicBlock *BB = I++;
// If the destination block has a single pred, then this is a trivial
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
if (!SinglePred || SinglePred == BB) continue;
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
Changed = true;
// Remember if SinglePred was the entry block of the function.
// If so, we will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
MergeBasicBlockIntoOnlyPred(BB, this);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
// We have erased a block. Update the iterator.
I = BB;
DEBUG(dbgs() << "Merged:\n"<< *SinglePred << "\n\n\n");
}
}
return Changed;
}
/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
/// debug info directives, and an unconditional branch. Passes before isel /// debug info directives, and an unconditional branch. Passes before isel
/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for

View File

@ -3,16 +3,17 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
target triple = "thumbv7-apple-macosx10.6.7" target triple = "thumbv7-apple-macosx10.6.7"
;CHECK: vadd.f32 q4, q8, q8 ;CHECK: vadd.f32 q4, q8, q8
;CHECK-NEXT: Ltmp ;CHECK-NEXT: Ltmp1
;CHECK-NEXT: @DEBUG_VALUE: y <- Q4+0
;CHECK-NEXT: @DEBUG_VALUE: x <- Q4+0 ;CHECK:@DEBUG_VALUE: x <- Q4+0
;CHECK-NEXT:@DEBUG_VALUE: y <- Q4+0
@.str = external constant [13 x i8] @.str = external constant [13 x i8]
declare <4 x float> @test0001(float) nounwind readnone ssp declare <4 x float> @test0001(float) nounwind readnone ssp
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp {
entry: entry:
br label %for.body9 br label %for.body9
@ -21,7 +22,7 @@ for.body9: ; preds = %for.body9, %entry
tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39 tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
%add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39 %add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39 tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
br i1 undef, label %for.end54, label %for.body9, !dbg !44 br i1 %cond, label %for.end54, label %for.body9, !dbg !44
for.end54: ; preds = %for.body9 for.end54: ; preds = %for.body9
%tmp115 = extractelement <4 x float> %add19, i32 1 %tmp115 = extractelement <4 x float> %add19, i32 1
@ -52,7 +53,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] !7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
!8 = metadata !{metadata !9} !8 = metadata !{metadata !9}
!9 = metadata !{i32 589857, i64 0, i64 3} ; [ DW_TAG_subrange_type ] !9 = metadata !{i32 589857, i64 0, i64 3} ; [ DW_TAG_subrange_type ]
!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ] !10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null} ; [ DW_TAG_subprogram ]
!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
!12 = metadata !{metadata !13} !12 = metadata !{metadata !13}
!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]

View File

@ -2,7 +2,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "powerpc64-unknown-linux-gnu" target triple = "powerpc64-unknown-linux-gnu"
; RUN: llc < %s | FileCheck %s ; RUN: llc < %s | FileCheck %s
define fastcc void @allocateSpace() nounwind { define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
entry: entry:
%0 = load i8** undef, align 8, !tbaa !0 %0 = load i8** undef, align 8, !tbaa !0
br i1 undef, label %return, label %lor.lhs.false br i1 undef, label %return, label %lor.lhs.false
@ -20,10 +20,10 @@ while.cond: ; preds = %while.body, %if.the
%idxprom17 = sext i32 0 to i64 %idxprom17 = sext i32 0 to i64
%arrayidx18 = getelementptr inbounds i8* %0, i64 %idxprom17 %arrayidx18 = getelementptr inbounds i8* %0, i64 %idxprom17
%or = or i32 undef, undef %or = or i32 undef, undef
br i1 false, label %if.end71, label %while.body br i1 %cond1, label %if.end71, label %while.body
while.body: ; preds = %while.cond while.body: ; preds = %while.cond
br i1 undef, label %while.cond, label %if.then45 br i1 %cond2, label %while.cond, label %if.then45
if.then45: ; preds = %while.body if.then45: ; preds = %while.body
%idxprom48139 = zext i32 %or to i64 %idxprom48139 = zext i32 %or to i64

View File

@ -52,8 +52,8 @@ entry:
%tmp21 = load double* %tmp20 ; <double> [#uses=1] %tmp21 = load double* %tmp20 ; <double> [#uses=1]
%tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0 ; <i8*> [#uses=1] %tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0 ; <i8*> [#uses=1]
%tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 ) ; <i32> [#uses=0] %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 ) ; <i32> [#uses=0]
br label %return br label %finish
return: ; preds = %entry finish:
%retval.upgrd.8 = load i32* %retval ; <i32> [#uses=1] %retval.upgrd.8 = load i32* %retval ; <i32> [#uses=1]
ret i32 %retval.upgrd.8 ret i32 %retval.upgrd.8
} }

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86-64 > %t ; RUN: llc < %s -march=x86-64 > %t
; RUN: grep movb %t | count 2 ; RUN: grep movb %t | count 1
; RUN: grep "movzb[wl]" %t ; RUN: grep "movzb[wl]" %t

View File

@ -10,10 +10,10 @@
%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* } %struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind { define i32 @_bfd_stab_section_find_nearest_line(i32 %offset, i1 %cond) nounwind {
entry: entry:
%tmp910 = add i32 0, %offset ; <i32> [#uses=1] %tmp910 = add i32 0, %offset ; <i32> [#uses=1]
br i1 true, label %bb951, label %bb917 br i1 %cond, label %bb951, label %bb917
bb917: ; preds = %entry bb917: ; preds = %entry
ret i32 0 ret i32 0
@ -21,7 +21,7 @@ bb917: ; preds = %entry
bb951: ; preds = %bb986, %entry bb951: ; preds = %bb986, %entry
%tmp955 = sdiv i32 0, 2 ; <i32> [#uses=3] %tmp955 = sdiv i32 0, 2 ; <i32> [#uses=3]
%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0 ; <i32*> [#uses=1] %tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0 ; <i32*> [#uses=1]
br i1 true, label %bb986, label %bb967 br i1 %cond, label %bb986, label %bb967
bb967: ; preds = %bb951 bb967: ; preds = %bb951
ret i32 0 ret i32 0

View File

@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin10.0.0"
; CHECK: @test1 ; CHECK: @test1
; objectsize should fold to a constant, which causes the branch to fold to an ; objectsize should fold to a constant, which causes the branch to fold to an
; uncond branch. ; uncond branch. Next, we fold the control flow alltogether.
; rdar://8785296 ; rdar://8785296
define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 { define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 {
entry: entry:
@ -13,8 +13,8 @@ entry:
%1 = icmp ugt i64 %0, 3 %1 = icmp ugt i64 %0, 3
br i1 %1, label %T, label %trap br i1 %1, label %T, label %trap
; CHECK: entry: ; CHECK: T:
; CHECK-NEXT: br label %T ; CHECK-NOT: br label %
trap: ; preds = %0, %entry trap: ; preds = %0, %entry
tail call void @llvm.trap() noreturn nounwind tail call void @llvm.trap() noreturn nounwind

View File

@ -44,7 +44,7 @@ declare %s* @getstruct() nounwind
; CHECK: @main ; CHECK: @main
; Check that the loop preheader contains no address computation. ; Check that the loop preheader contains no address computation.
; CHECK: %entry ; CHECK: %end_of_chain
; CHECK-NOT: add{{.*}}lsl ; CHECK-NOT: add{{.*}}lsl
; CHECK: ldr{{.*}}lsl #2 ; CHECK: ldr{{.*}}lsl #2
; CHECK: ldr{{.*}}lsl #2 ; CHECK: ldr{{.*}}lsl #2
@ -65,15 +65,15 @@ while.cond:
while.body: while.body:
%v3 = load i32* @ncol, align 4, !tbaa !0 %v3 = load i32* @ncol, align 4, !tbaa !0
br label %while.cond.i br label %end_of_chain
while.cond.i: end_of_chain:
%state.i = getelementptr inbounds %s* %call18, i32 0, i32 0 %state.i = getelementptr inbounds %s* %call18, i32 0, i32 0
%v4 = load i32** %state.i, align 4, !tbaa !3 %v4 = load i32** %state.i, align 4, !tbaa !3
br label %while.cond.i.i br label %while.cond.i.i
while.cond.i.i: while.cond.i.i:
%counter.0.i.i = phi i32 [ %v3, %while.cond.i ], [ %dec.i.i, %land.rhs.i.i ] %counter.0.i.i = phi i32 [ %v3, %end_of_chain ], [ %dec.i.i, %land.rhs.i.i ]
%dec.i.i = add nsw i32 %counter.0.i.i, -1 %dec.i.i = add nsw i32 %counter.0.i.i, -1
%tobool.i.i = icmp eq i32 %counter.0.i.i, 0 %tobool.i.i = icmp eq i32 %counter.0.i.i, 0
br i1 %tobool.i.i, label %where.exit, label %land.rhs.i.i br i1 %tobool.i.i, label %where.exit, label %land.rhs.i.i