mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-15 23:31:37 +00:00
02c2ecf9f1
That can usually be lowered efficiently and is common in sandybridge code. It would be nice to do this in DAGCombiner but we can't insert arbitrary BUILD_VECTORs this late. Fixes PR15462. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176634 91177308-0d34-0410-b5e6-96231b3b80d8
148 lines
4.3 KiB
LLVM
148 lines
4.3 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
|
|
|
;;; Shift left
|
|
; CHECK: vpslld
|
|
; CHECK: vpslld
|
|
define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
|
|
%s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
|
|
2>
|
|
ret <8 x i32> %s
|
|
}
|
|
|
|
; CHECK: vpsllw
|
|
; CHECK: vpsllw
|
|
define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
|
|
%s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
|
ret <16 x i16> %s
|
|
}
|
|
|
|
; CHECK: vpsllq
|
|
; CHECK: vpsllq
|
|
define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
|
|
%s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
|
|
ret <4 x i64> %s
|
|
}
|
|
|
|
;;; Logical Shift right
|
|
; CHECK: vpsrld
|
|
; CHECK: vpsrld
|
|
define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
|
|
%s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
|
|
2>
|
|
ret <8 x i32> %s
|
|
}
|
|
|
|
; CHECK: vpsrlw
|
|
; CHECK: vpsrlw
|
|
define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
|
|
%s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
|
ret <16 x i16> %s
|
|
}
|
|
|
|
; CHECK: vpsrlq
|
|
; CHECK: vpsrlq
|
|
define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
|
|
%s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
|
|
ret <4 x i64> %s
|
|
}
|
|
|
|
;;; Arithmetic Shift right
|
|
; CHECK: vpsrad
|
|
; CHECK: vpsrad
|
|
define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
|
|
%s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
|
|
2>
|
|
ret <8 x i32> %s
|
|
}
|
|
|
|
; CHECK: vpsraw
|
|
; CHECK: vpsraw
|
|
define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
|
|
%s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
|
ret <16 x i16> %s
|
|
}
|
|
|
|
; CHECK: vpsrlw
|
|
; CHECK: pand
|
|
; CHECK: pxor
|
|
; CHECK: psubb
|
|
; CHECK: vpsrlw
|
|
; CHECK: pand
|
|
; CHECK: pxor
|
|
; CHECK: psubb
|
|
define <32 x i8> @vshift09(<32 x i8> %a) nounwind readnone {
|
|
%s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
|
|
ret <32 x i8> %s
|
|
}
|
|
|
|
; CHECK: pxor
|
|
; CHECK: pcmpgtb
|
|
; CHECK: pcmpgtb
|
|
define <32 x i8> @vshift10(<32 x i8> %a) nounwind readnone {
|
|
%s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
|
ret <32 x i8> %s
|
|
}
|
|
|
|
; CHECK: vpsrlw
|
|
; CHECK: pand
|
|
; CHECK: vpsrlw
|
|
; CHECK: pand
|
|
define <32 x i8> @vshift11(<32 x i8> %a) nounwind readnone {
|
|
%s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
|
|
ret <32 x i8> %s
|
|
}
|
|
|
|
; CHECK: vpsllw
|
|
; CHECK: pand
|
|
; CHECK: vpsllw
|
|
; CHECK: pand
|
|
define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
|
|
%s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
|
|
ret <32 x i8> %s
|
|
}
|
|
|
|
;;; Support variable shifts
|
|
; CHECK: _vshift08
|
|
; CHECK: vextractf128 $1
|
|
; CHECK: vpslld $23
|
|
; CHECK: vpslld $23
|
|
define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
|
|
%bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
|
|
ret <8 x i32> %bitop
|
|
}
|
|
|
|
; PR15141
|
|
; CHECK: _vshift13:
|
|
; CHECK-NOT: vpsll
|
|
; CHECK: vcvttps2dq
|
|
; CHECK-NEXT: vpmulld
|
|
define <4 x i32> @vshift13(<4 x i32> %in) {
|
|
%T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
|
|
ret <4 x i32> %T
|
|
}
|
|
|
|
;;; Uses shifts for sign extension
|
|
; CHECK: _sext_v16i16
|
|
; CHECK: vpsllw
|
|
; CHECK: vpsraw
|
|
; CHECK: vpsllw
|
|
; CHECK: vpsraw
|
|
; CHECK: vinsertf128
|
|
define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
|
|
%b = trunc <16 x i16> %a to <16 x i8>
|
|
%c = sext <16 x i8> %b to <16 x i16>
|
|
ret <16 x i16> %c
|
|
}
|
|
|
|
; CHECK: _sext_v8i32
|
|
; CHECK: vpslld
|
|
; CHECK: vpsrad
|
|
; CHECK: vpslld
|
|
; CHECK: vpsrad
|
|
; CHECK: vinsertf128
|
|
define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
|
|
%b = trunc <8 x i32> %a to <8 x i16>
|
|
%c = sext <8 x i16> %b to <8 x i32>
|
|
ret <8 x i32> %c
|
|
}
|