mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	Instead of expanding a packed shift into a sequence of scalar shifts, the backend now tries (when possible) to convert the vector shift into a vector multiply. Before this change, a shift of a MVT::v8i16 vector by a build_vector of constants was always scalarized into a long sequence of "vector extracts + scalar shifts + vector insert". With this change, if there is SSE2 support, we emit a single vector multiply. This change also affects SSE4.1, AVX, AVX2 shifts: - A shift of a MVT::v4i32 vector by a build_vector of non uniform constants is now lowered when possible into a single SSE4.1 vector multiply. - Packed v16i16 shift left by constant build_vector are now expanded when possible into a single AVX2 vpmullw. This change also improves the lowering of AVX512f vector shifts. Added test CodeGen/X86/vec_shift6.ll with some code examples that are affected by this change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201271 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			149 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
			
		
		
	
	
			149 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			LLVM
		
	
	
	
	
	
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 | |
| 
 | |
| ;;; Shift left
 | |
| ; CHECK: vpslld
 | |
| ; CHECK: vpslld
 | |
| define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
 | |
|   %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
 | |
| 2>
 | |
|   ret <8 x i32> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: vpsllw
 | |
| ; CHECK: vpsllw
 | |
| define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
 | |
|   %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 | |
|   ret <16 x i16> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: vpsllq
 | |
| ; CHECK: vpsllq
 | |
| define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
 | |
|   %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
 | |
|   ret <4 x i64> %s
 | |
| }
 | |
| 
 | |
| ;;; Logical Shift right
 | |
| ; CHECK: vpsrld
 | |
| ; CHECK: vpsrld
 | |
| define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
 | |
|   %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
 | |
| 2>
 | |
|   ret <8 x i32> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: vpsrlw
 | |
| ; CHECK: vpsrlw
 | |
| define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
 | |
|   %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 | |
|   ret <16 x i16> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: vpsrlq
 | |
| ; CHECK: vpsrlq
 | |
| define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
 | |
|   %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
 | |
|   ret <4 x i64> %s
 | |
| }
 | |
| 
 | |
| ;;; Arithmetic Shift right
 | |
| ; CHECK: vpsrad
 | |
| ; CHECK: vpsrad
 | |
| define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
 | |
|   %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
 | |
| 2>
 | |
|   ret <8 x i32> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: vpsraw
 | |
| ; CHECK: vpsraw
 | |
| define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
 | |
|   %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
 | |
|   ret <16 x i16> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: vpsrlw
 | |
| ; CHECK: pand
 | |
| ; CHECK: pxor
 | |
| ; CHECK: psubb
 | |
| ; CHECK: vpsrlw
 | |
| ; CHECK: pand
 | |
| ; CHECK: pxor
 | |
| ; CHECK: psubb
 | |
| define <32 x i8> @vshift09(<32 x i8> %a) nounwind readnone {
 | |
|   %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 | |
|   ret <32 x i8> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: pxor
 | |
| ; CHECK: pcmpgtb
 | |
| ; CHECK: pcmpgtb
 | |
| define <32 x i8> @vshift10(<32 x i8> %a) nounwind readnone {
 | |
|   %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
 | |
|   ret <32 x i8> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: vpsrlw
 | |
| ; CHECK: pand
 | |
| ; CHECK: vpsrlw
 | |
| ; CHECK: pand
 | |
| define <32 x i8> @vshift11(<32 x i8> %a) nounwind readnone {
 | |
|   %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 | |
|   ret <32 x i8> %s
 | |
| }
 | |
| 
 | |
| ; CHECK: vpsllw
 | |
| ; CHECK: pand
 | |
| ; CHECK: vpsllw
 | |
| ; CHECK: pand
 | |
| define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
 | |
|   %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
 | |
|   ret <32 x i8> %s
 | |
| }
 | |
| 
 | |
| ;;; Support variable shifts
 | |
| ; CHECK: _vshift08
 | |
| ; CHECK: vpslld $23
 | |
| ; CHECK: vextractf128 $1
 | |
| ; CHECK: vpslld $23
 | |
| ; CHECK: ret
 | |
| define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
 | |
|   %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
 | |
|   ret <8 x i32> %bitop
 | |
| }
 | |
| 
 | |
| ; PR15141
 | |
| ; CHECK: _vshift13:
 | |
| ; CHECK-NOT: vpsll
 | |
| ; CHECK-NOT: vcvttps2dq
 | |
| ; CHECK: vpmulld
 | |
| define <4 x i32> @vshift13(<4 x i32> %in) {
 | |
|   %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
 | |
|   ret <4 x i32> %T
 | |
| }
 | |
| 
 | |
| ;;; Uses shifts for sign extension
 | |
| ; CHECK: _sext_v16i16
 | |
| ; CHECK: vpsllw
 | |
| ; CHECK: vpsraw
 | |
| ; CHECK: vpsllw
 | |
| ; CHECK: vpsraw
 | |
| ; CHECK: vinsertf128
 | |
| define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
 | |
|   %b = trunc <16 x i16> %a to <16 x i8>
 | |
|   %c = sext <16 x i8> %b to <16 x i16>
 | |
|   ret <16 x i16> %c
 | |
| }
 | |
| 
 | |
| ; CHECK: _sext_v8i32
 | |
| ; CHECK: vpslld
 | |
| ; CHECK: vpsrad
 | |
| ; CHECK: vpslld
 | |
| ; CHECK: vpsrad
 | |
| ; CHECK: vinsertf128
 | |
| define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
 | |
|   %b = trunc <8 x i32> %a to <8 x i16>
 | |
|   %c = sext <8 x i16> %b to <8 x i32>
 | |
|   ret <8 x i32> %c
 | |
| }
 |