mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	Aside from a few minor latency corrections, the major change here is a new hazard recognizer which focuses on better dispatch-group formation on the POWER7. As with the PPC970's hazard recognizer, the most important thing it does is avoid load-after-store hazards within the same dispatch group. It uses the POWER7's special dispatch-group-terminating nop instruction (instead of inserting multiple regular nop instructions). This new hazard recognizer makes use of the scheduling dependency graph itself, built using AA information, to robustly detect the possibility of load-after-store hazards. significant test-suite performance changes (the error bars are 99.5% confidence intervals based on 5 test-suite runs both with and without the change -- speedups are negative): speedups: MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 -0.55171% +/- 0.333168% MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CrossingThresholds-dbl -17.5576% +/- 14.598% MultiSource/Benchmarks/TSVC/Reductions-dbl/Reductions-dbl -29.5708% +/- 7.09058% MultiSource/Benchmarks/TSVC/Reductions-flt/Reductions-flt -34.9471% +/- 11.4391% SingleSource/Benchmarks/BenchmarkGame/puzzle -25.1347% +/- 11.0104% SingleSource/Benchmarks/Misc/flops-8 -17.7297% +/- 9.79061% SingleSource/Benchmarks/Shootout-C++/ary3 -35.5018% +/- 23.9458% SingleSource/Regression/C/uint64_to_float -56.3165% +/- 25.4234% SingleSource/UnitTests/Vectorizer/gcc-loops -18.5309% +/- 6.8496% regressions: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 18.351% +/- 12.156% SingleSource/Benchmarks/Shootout-C++/methcall 27.3086% +/- 14.4733% git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197099 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			32 lines
		
	
	
		
			888 B
		
	
	
	
		
			LLVM
		
	
	
	
	
	
			
		
		
	
	
			32 lines
		
	
	
		
			888 B
		
	
	
	
		
			LLVM
		
	
	
	
	
	
| ; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
 | |
| target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 | |
| target triple = "powerpc64-unknown-linux-gnu"
 | |
| 
 | |
| ; Function Attrs: nounwind
 | |
| define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c, float* nocapture %d) #0 {
 | |
| 
 | |
| ; CHECK-LABEL: @foo
 | |
| 
 | |
| entry:
 | |
|   %0 = load float* %b, align 4
 | |
|   store float %0, float* %a, align 4
 | |
|   %1 = load float* %c, align 4
 | |
|   store float %1, float* %b, align 4
 | |
|   %2 = load float* %a, align 4
 | |
|   store float %2, float* %d, align 4
 | |
|   ret void
 | |
| 
 | |
| ; CHECK: lfs [[REG1:[0-9]+]], 0(4)
 | |
| ; CHECK: stfs [[REG1]], 0(3)
 | |
| ; CHECK: ori 2, 2, 0
 | |
| ; CHECK: lfs [[REG2:[0-9]+]], 0(5)
 | |
| ; CHECK: stfs [[REG2]], 0(4)
 | |
| ; CHECK: ori 2, 2, 0
 | |
| ; CHECK: lfs [[REG3:[0-9]+]], 0(3)
 | |
| ; CHECK: stfs [[REG3]], 0(6)
 | |
| ; CHECK: blr
 | |
| }
 | |
| 
 | |
| attributes #0 = { nounwind }
 | |
| 
 |