mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-03 14:21:30 +00:00 
			
		
		
		
	1. Teach it to use overlapping unaligned load / store to copy / set the trailing bytes. e.g. On 86, use two pairs of movups / movaps for 17 - 31 byte copies. 2. Use f64 for memcpy / memset on targets where i64 is not legal but f64 is. e.g. x86 and ARM. 3. When memcpy from a constant string, do *not* replace the load with a constant if it's not possible to materialize an integer immediate with a single instruction (required a new target hook: TLI.isIntImmLegal()). 4. Use unaligned load / stores more aggressively if target hooks indicates they are "fast". 5. Update ARM target hooks to use unaligned load / stores. e.g. vld1.8 / vst1.8. Also increase the threshold to something reasonable (8 for memset, 4 pairs for memcpy). This significantly improves Dhrystone, up to 50% on ARM iOS devices. rdar://12760078 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169791 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			31 lines
		
	
	
		
			998 B
		
	
	
	
		
			LLVM
		
	
	
	
	
	
			
		
		
	
	
			31 lines
		
	
	
		
			998 B
		
	
	
	
		
			LLVM
		
	
	
	
	
	
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
 | 
						|
 | 
						|
define void @t1(i8* nocapture %c) nounwind optsize {
 | 
						|
entry:
 | 
						|
; CHECK: t1:
 | 
						|
; CHECK: movs r1, #0
 | 
						|
; CHECK: str r1, [r0]
 | 
						|
; CHECK: str r1, [r0, #4]
 | 
						|
; CHECK: str r1, [r0, #8]
 | 
						|
  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define void @t2() nounwind ssp {
 | 
						|
entry:
 | 
						|
; CHECK: t2:
 | 
						|
; CHECK: add.w r1, r0, #10
 | 
						|
; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
 | 
						|
; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 | 
						|
; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
 | 
						|
  %buf = alloca [26 x i8], align 1
 | 
						|
  %0 = getelementptr inbounds [26 x i8]* %buf, i32 0, i32 0
 | 
						|
  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
 | 
						|
  call void @something(i8* %0) nounwind
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
declare void @something(i8*) nounwind
 | 
						|
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 | 
						|
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 |