mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	This patch just uses a peephole test for "add; compare; branch" sequences within a single block. The IR optimizers already convert loops to decrement-and-branch-on-nonzero form in some cases, so even this simplistic test triggers many times during a clang bootstrap and projects/test-suite run. It looks like there are still cases where we need to more strongly prefer branches on nonzero though. E.g. I saw a case where a loop that started out with a check for 0 ended up with a check for -1. I'll try to look at that sometime. I ended up adding the Reference class because MachineInstr::readsRegister() doesn't check for subregisters (by design, as far as I could tell). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187723 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			125 lines
		
	
	
		
			2.9 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
			
		
		
	
	
			125 lines
		
	
	
		
			2.9 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
; Test loop tuning.
 | 
						|
;
 | 
						|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 | 
						|
 | 
						|
; Test that strength reduction is applied to addresses with a scale factor,
 | 
						|
; but that indexed addressing can still be used.
 | 
						|
define void @f1(i32 *%dest, i32 %a) {
 | 
						|
; CHECK-LABEL: f1:
 | 
						|
; CHECK-NOT: sllg
 | 
						|
; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
 | 
						|
; CHECK: br %r14
 | 
						|
entry:
 | 
						|
  br label %loop
 | 
						|
 | 
						|
loop:
 | 
						|
  %index = phi i64 [ 0, %entry ], [ %next, %loop ]
 | 
						|
  %ptr = getelementptr i32 *%dest, i64 %index
 | 
						|
  store i32 %a, i32 *%ptr
 | 
						|
  %next = add i64 %index, 1
 | 
						|
  %cmp = icmp ne i64 %next, 100
 | 
						|
  br i1 %cmp, label %loop, label %exit
 | 
						|
 | 
						|
exit:
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Test a loop that should be converted into dbr form and then use BRCT.
 | 
						|
define void @f2(i32 *%src, i32 *%dest) {
 | 
						|
; CHECK-LABEL: f2:
 | 
						|
; CHECK: lhi [[REG:%r[0-5]]], 100
 | 
						|
; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
 | 
						|
; CHECK: brct [[REG]], [[LABEL]]
 | 
						|
; CHECK: br %r14
 | 
						|
entry:
 | 
						|
  br label %loop
 | 
						|
 | 
						|
loop:
 | 
						|
  %count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
 | 
						|
  %next = add i32 %count, 1
 | 
						|
  %val = load volatile i32 *%src
 | 
						|
  %cmp = icmp eq i32 %val, 0
 | 
						|
  br i1 %cmp, label %loop.next, label %loop.store
 | 
						|
 | 
						|
loop.store:
 | 
						|
  %add = add i32 %val, 1
 | 
						|
  store volatile i32 %add, i32 *%dest
 | 
						|
  br label %loop.next
 | 
						|
 | 
						|
loop.next:
 | 
						|
  %cont = icmp ne i32 %next, 100
 | 
						|
  br i1 %cont, label %loop, label %exit
 | 
						|
 | 
						|
exit:
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Like f2, but for BRCTG.
 | 
						|
define void @f3(i64 *%src, i64 *%dest) {
 | 
						|
; CHECK-LABEL: f3:
 | 
						|
; CHECK: lghi [[REG:%r[0-5]]], 100
 | 
						|
; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
 | 
						|
; CHECK: brctg [[REG]], [[LABEL]]
 | 
						|
; CHECK: br %r14
 | 
						|
entry:
 | 
						|
  br label %loop
 | 
						|
 | 
						|
loop:
 | 
						|
  %count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
 | 
						|
  %next = add i64 %count, 1
 | 
						|
  %val = load volatile i64 *%src
 | 
						|
  %cmp = icmp eq i64 %val, 0
 | 
						|
  br i1 %cmp, label %loop.next, label %loop.store
 | 
						|
 | 
						|
loop.store:
 | 
						|
  %add = add i64 %val, 1
 | 
						|
  store volatile i64 %add, i64 *%dest
 | 
						|
  br label %loop.next
 | 
						|
 | 
						|
loop.next:
 | 
						|
  %cont = icmp ne i64 %next, 100
 | 
						|
  br i1 %cont, label %loop, label %exit
 | 
						|
 | 
						|
exit:
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Test a loop with a 64-bit decremented counter in which the 32-bit
 | 
						|
; low part of the counter is used after the decrement.  This is an example
 | 
						|
; of a subregister use being the only thing that blocks a conversion to BRCTG.
 | 
						|
define void @f4(i32 *%src, i32 *%dest, i64 *%dest2, i64 %count) {
 | 
						|
; CHECK-LABEL: f4:
 | 
						|
; CHECK: aghi [[REG:%r[0-5]]], -1
 | 
						|
; CHECK: lr [[REG2:%r[0-5]]], [[REG]]
 | 
						|
; CHECK: stg [[REG2]],
 | 
						|
; CHECK: jne {{\..*}}
 | 
						|
; CHECK: br %r14
 | 
						|
entry:
 | 
						|
  br label %loop
 | 
						|
 | 
						|
loop:
 | 
						|
  %left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
 | 
						|
  store volatile i64 %left, i64 *%dest2
 | 
						|
  %val = load volatile i32 *%src
 | 
						|
  %cmp = icmp eq i32 %val, 0
 | 
						|
  br i1 %cmp, label %loop.next, label %loop.store
 | 
						|
 | 
						|
loop.store:
 | 
						|
  %add = add i32 %val, 1
 | 
						|
  store volatile i32 %add, i32 *%dest
 | 
						|
  br label %loop.next
 | 
						|
 | 
						|
loop.next:
 | 
						|
  %next = add i64 %left, -1
 | 
						|
  %ext = zext i32 %val to i64
 | 
						|
  %shl = shl i64 %ext, 32
 | 
						|
  %and = and i64 %next, 4294967295
 | 
						|
  %or = or i64 %shl, %and
 | 
						|
  store volatile i64 %or, i64 *%dest2
 | 
						|
  %cont = icmp ne i64 %next, 0
 | 
						|
  br i1 %cont, label %loop, label %exit
 | 
						|
 | 
						|
exit:
 | 
						|
  ret void
 | 
						|
}
 |