mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	to break up CFG diamonds by banishing one of the blocks to the end of the function, which is bad for code density and branch size. This does pessimize MultiSource/Benchmarks/Ptrdist/yacr2, the benchmark cited as the reason for the change, however I've examined the code and it looks more like a case of gaming a particular branch than of being generally applicable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84803 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			208 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
			
		
		
	
	
			208 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
 | 
						|
 | 
						|
; These tests check for loop branching structure, and that the loop align
 | 
						|
; directive is placed in the expected place.
 | 
						|
 | 
						|
; CodeGen should insert a branch into the middle of the loop in
 | 
						|
; order to avoid a branch within the loop.
 | 
						|
 | 
						|
; CHECK: simple:
 | 
						|
;      CHECK:   jmp   .LBB1_1
 | 
						|
; CHECK-NEXT:   align
 | 
						|
; CHECK-NEXT: .LBB1_2:
 | 
						|
; CHECK-NEXT:   call loop_latch
 | 
						|
; CHECK-NEXT: .LBB1_1:
 | 
						|
; CHECK-NEXT:   call loop_header
 | 
						|
 | 
						|
define void @simple() nounwind {
 | 
						|
entry:
 | 
						|
  br label %loop
 | 
						|
 | 
						|
loop:
 | 
						|
  call void @loop_header()
 | 
						|
  %t0 = tail call i32 @get()
 | 
						|
  %t1 = icmp slt i32 %t0, 0
 | 
						|
  br i1 %t1, label %done, label %bb
 | 
						|
 | 
						|
bb:
 | 
						|
  call void @loop_latch()
 | 
						|
  br label %loop
 | 
						|
 | 
						|
done:
 | 
						|
  call void @exit()
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; CodeGen should move block_a to the top of the loop so that it
 | 
						|
; falls through into the loop, avoiding a branch within the loop.
 | 
						|
 | 
						|
; CHECK: slightly_more_involved:
 | 
						|
;      CHECK:   jmp .LBB2_1
 | 
						|
; CHECK-NEXT:   align
 | 
						|
; CHECK-NEXT: .LBB2_4:
 | 
						|
; CHECK-NEXT:   call bar99
 | 
						|
; CHECK-NEXT: .LBB2_1:
 | 
						|
; CHECK-NEXT:   call body
 | 
						|
 | 
						|
define void @slightly_more_involved() nounwind {
 | 
						|
entry:
 | 
						|
  br label %loop
 | 
						|
 | 
						|
loop:
 | 
						|
  call void @body()
 | 
						|
  %t0 = call i32 @get()
 | 
						|
  %t1 = icmp slt i32 %t0, 2
 | 
						|
  br i1 %t1, label %block_a, label %bb
 | 
						|
 | 
						|
bb:
 | 
						|
  %t2 = call i32 @get()
 | 
						|
  %t3 = icmp slt i32 %t2, 99
 | 
						|
  br i1 %t3, label %exit, label %loop
 | 
						|
 | 
						|
block_a:
 | 
						|
  call void @bar99()
 | 
						|
  br label %loop
 | 
						|
 | 
						|
exit:
 | 
						|
  call void @exit()
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Same as slightly_more_involved, but block_a is now a CFG diamond with
 | 
						|
; fallthrough edges which should be preserved.
 | 
						|
 | 
						|
; CHECK: yet_more_involved:
 | 
						|
;      CHECK:   jmp .LBB3_1
 | 
						|
; CHECK-NEXT:   align
 | 
						|
; CHECK-NEXT: .LBB3_3:
 | 
						|
; CHECK-NEXT:   call bar99
 | 
						|
; CHECK-NEXT:   call get
 | 
						|
; CHECK-NEXT:   cmpl $2999, %eax
 | 
						|
; CHECK-NEXT:   jg .LBB3_5
 | 
						|
; CHECK-NEXT:   call block_a_true_func
 | 
						|
; CHECK-NEXT:   jmp .LBB3_6
 | 
						|
; CHECK-NEXT: .LBB3_5:
 | 
						|
; CHECK-NEXT:   call block_a_false_func
 | 
						|
; CHECK-NEXT: .LBB3_6:
 | 
						|
; CHECK-NEXT:   call block_a_merge_func
 | 
						|
; CHECK-NEXT: .LBB3_1:
 | 
						|
; CHECK-NEXT:   call body
 | 
						|
 | 
						|
define void @yet_more_involved() nounwind {
 | 
						|
entry:
 | 
						|
  br label %loop
 | 
						|
 | 
						|
loop:
 | 
						|
  call void @body()
 | 
						|
  %t0 = call i32 @get()
 | 
						|
  %t1 = icmp slt i32 %t0, 2
 | 
						|
  br i1 %t1, label %block_a, label %bb
 | 
						|
 | 
						|
bb:
 | 
						|
  %t2 = call i32 @get()
 | 
						|
  %t3 = icmp slt i32 %t2, 99
 | 
						|
  br i1 %t3, label %exit, label %loop
 | 
						|
 | 
						|
block_a:
 | 
						|
  call void @bar99()
 | 
						|
  %z0 = call i32 @get()
 | 
						|
  %z1 = icmp slt i32 %z0, 3000
 | 
						|
  br i1 %z1, label %block_a_true, label %block_a_false
 | 
						|
 | 
						|
block_a_true:
 | 
						|
  call void @block_a_true_func()
 | 
						|
  br label %block_a_merge
 | 
						|
 | 
						|
block_a_false:
 | 
						|
  call void @block_a_false_func()
 | 
						|
  br label %block_a_merge
 | 
						|
 | 
						|
block_a_merge:
 | 
						|
  call void @block_a_merge_func()
 | 
						|
  br label %loop
 | 
						|
 | 
						|
exit:
 | 
						|
  call void @exit()
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; CodeGen should move the CFG islands that are part of the loop but don't
 | 
						|
; conveniently fit anywhere so that they are at least contiguous with the
 | 
						|
; loop.
 | 
						|
 | 
						|
; CHECK: cfg_islands:
 | 
						|
;      CHECK:   jmp     .LBB4_1
 | 
						|
; CHECK-NEXT:   align
 | 
						|
; CHECK-NEXT: .LBB4_7:
 | 
						|
; CHECK-NEXT:   call    bar100
 | 
						|
; CHECK-NEXT:   jmp     .LBB4_1
 | 
						|
; CHECK-NEXT: .LBB4_8:
 | 
						|
; CHECK-NEXT:   call    bar101
 | 
						|
; CHECK-NEXT:   jmp     .LBB4_1
 | 
						|
; CHECK-NEXT: .LBB4_9:
 | 
						|
; CHECK-NEXT:   call    bar102
 | 
						|
; CHECK-NEXT:   jmp     .LBB4_1
 | 
						|
; CHECK-NEXT: .LBB4_5:
 | 
						|
; CHECK-NEXT:   call    loop_latch
 | 
						|
; CHECK-NEXT: .LBB4_1:
 | 
						|
; CHECK-NEXT:   call    loop_header
 | 
						|
 | 
						|
define void @cfg_islands() nounwind {
 | 
						|
entry:
 | 
						|
  br label %loop
 | 
						|
 | 
						|
loop:
 | 
						|
  call void @loop_header()
 | 
						|
  %t0 = call i32 @get()
 | 
						|
  %t1 = icmp slt i32 %t0, 100
 | 
						|
  br i1 %t1, label %block100, label %bb
 | 
						|
 | 
						|
bb:
 | 
						|
  %t2 = call i32 @get()
 | 
						|
  %t3 = icmp slt i32 %t2, 101
 | 
						|
  br i1 %t3, label %block101, label %bb1
 | 
						|
 | 
						|
bb1:
 | 
						|
  %t4 = call i32 @get()
 | 
						|
  %t5 = icmp slt i32 %t4, 102
 | 
						|
  br i1 %t5, label %block102, label %bb2
 | 
						|
 | 
						|
bb2:
 | 
						|
  %t6 = call i32 @get()
 | 
						|
  %t7 = icmp slt i32 %t6, 103
 | 
						|
  br i1 %t7, label %exit, label %bb3
 | 
						|
 | 
						|
bb3:
 | 
						|
  call void @loop_latch()
 | 
						|
  br label %loop
 | 
						|
 | 
						|
exit:
 | 
						|
  call void @exit()
 | 
						|
  ret void
 | 
						|
 | 
						|
block100:
 | 
						|
  call void @bar100()
 | 
						|
  br label %loop
 | 
						|
 | 
						|
block101:
 | 
						|
  call void @bar101()
 | 
						|
  br label %loop
 | 
						|
 | 
						|
block102:
 | 
						|
  call void @bar102()
 | 
						|
  br label %loop
 | 
						|
}
 | 
						|
 | 
						|
declare void @bar99() nounwind
 | 
						|
declare void @bar100() nounwind
 | 
						|
declare void @bar101() nounwind
 | 
						|
declare void @bar102() nounwind
 | 
						|
declare void @body() nounwind
 | 
						|
declare void @exit() nounwind
 | 
						|
declare void @loop_header() nounwind
 | 
						|
declare void @loop_latch() nounwind
 | 
						|
declare i32 @get() nounwind
 | 
						|
declare void @block_a_true_func() nounwind
 | 
						|
declare void @block_a_false_func() nounwind
 | 
						|
declare void @block_a_merge_func() nounwind
 |