mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-30 16:17:05 +00:00 
			
		
		
		
	This provides more realistic costs for the insert/extractelement instructions (which are load/store pairs), accounts for the cheap unaligned Altivec load sequence, and for unaligned VSX load/stores. Bad news: MultiSource/Applications/sgefa/sgefa - 35% slowdown (this will require more investigation) SingleSource/Benchmarks/McGill/queens - 20% slowdown (we no longer vectorize this, but it was a constant store that was scalarized) MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 - 2% slowdown Good news: SingleSource/Benchmarks/Shootout/ary3 - 54% speedup SingleSource/Benchmarks/Shootout-C++/ary - 40% speedup MultiSource/Benchmarks/Ptrdist/ks/ks - 35% speedup MultiSource/Benchmarks/FreeBench/neural/neural - 30% speedup MultiSource/Benchmarks/TSVC/Symbolics-flt/Symbolics-flt - 20% speedup Unfortunately, estimating the costs of the stack-based scalarization sequences is hard, and adjusting these costs is like a game of whac-a-mole :( I'll revisit this again after we have better codegen for vector extloads and truncstores and unaligned load/stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205658 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			22 lines
		
	
	
		
			532 B
		
	
	
	
		
			LLVM
		
	
	
	
	
	
			
		
		
	
	
			22 lines
		
	
	
		
			532 B
		
	
	
	
		
			LLVM
		
	
	
	
	
	
| ; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck %s
 | |
| target datalayout = "E-m:e-i64:64-n32:64"
 | |
| target triple = "powerpc64-unknown-linux-gnu"
 | |
| 
 | |
| define void @exts() {
 | |
| 
 | |
|   ; CHECK: cost of 1 {{.*}} sext
 | |
|   %v1 = sext i16 undef to i32
 | |
| 
 | |
|   ; CHECK: cost of 1 {{.*}} sext
 | |
|   %v2 = sext <2 x i16> undef to <2 x i32>
 | |
| 
 | |
|   ; CHECK: cost of 1 {{.*}} sext
 | |
|   %v3 = sext <4 x i16> undef to <4 x i32>
 | |
| 
 | |
|   ; CHECK: cost of 112 {{.*}} sext
 | |
|   %v4 = sext <8 x i16> undef to <8 x i32>
 | |
| 
 | |
|   ret void
 | |
| }
 | |
| 
 |