mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-21 19:32:16 +00:00
87d1836793
This patch improves support for sign extension of the lower lanes of vectors of integers by making use of the SSE41 pmovsx* sign extension instructions where possible, and optimizing the sign extension by shifts on pre-SSE41 targets (avoiding the use of i64 arithmetic shifts which require scalarization). It converts SIGN_EXTEND nodes to SIGN_EXTEND_VECTOR_INREG where necessary, that more closely matches the pmovsx* instruction than the default approach of using SIGN_EXTEND_INREG which splits the operation (into an ANY_EXTEND lowered to a shuffle followed by shifts) making instruction matching difficult during lowering. Necessary support for SIGN_EXTEND_VECTOR_INREG has been added to the DAGCombiner. Differential Revision: http://reviews.llvm.org/D9848 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237885 91177308-0d34-0410-b5e6-96231b3b80d8
139 lines
2.6 KiB
LLVM
139 lines
2.6 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
|
|
|
|
define <4 x i3> @test1(<4 x i3>* %in) nounwind {
|
|
%ret = load <4 x i3>, <4 x i3>* %in, align 1
|
|
ret <4 x i3> %ret
|
|
}
|
|
; CHECK-LABEL: test1
|
|
; CHECK: movzwl
|
|
; CHECK: shrl $3
|
|
; CHECK: andl $7
|
|
; CHECK: andl $7
|
|
; CHECK: vmovd
|
|
; CHECK: pinsrd $1
|
|
; CHECK: shrl $6
|
|
; CHECK: andl $7
|
|
; CHECK: pinsrd $2
|
|
; CHECK: shrl $9
|
|
; CHECK: andl $7
|
|
; CHECK: pinsrd $3
|
|
; CHECK: ret
|
|
|
|
define <4 x i1> @test2(<4 x i1>* %in) nounwind {
|
|
%ret = load <4 x i1>, <4 x i1>* %in, align 1
|
|
ret <4 x i1> %ret
|
|
}
|
|
|
|
; CHECK-LABEL: test2
|
|
; CHECK: movzbl
|
|
; CHECK: shrl
|
|
; CHECK: andl $1
|
|
; CHECK: andl $1
|
|
; CHECK: vmovd
|
|
; CHECK: pinsrd $1
|
|
; CHECK: shrl $2
|
|
; CHECK: andl $1
|
|
; CHECK: pinsrd $2
|
|
; CHECK: shrl $3
|
|
; CHECK: andl $1
|
|
; CHECK: pinsrd $3
|
|
; CHECK: ret
|
|
|
|
define <4 x i64> @test3(<4 x i1>* %in) nounwind {
|
|
%wide.load35 = load <4 x i1>, <4 x i1>* %in, align 1
|
|
%sext = sext <4 x i1> %wide.load35 to <4 x i64>
|
|
ret <4 x i64> %sext
|
|
}
|
|
|
|
; CHECK-LABEL: test3
|
|
; CHECK: movzbl
|
|
; CHECK: movq
|
|
; CHECK: shlq
|
|
; CHECK: sarq
|
|
; CHECK: movq
|
|
; CHECK: shlq
|
|
; CHECK: sarq
|
|
; CHECK: vmovd
|
|
; CHECK: vpinsrd
|
|
; CHECK: movq
|
|
; CHECK: shlq
|
|
; CHECK: sarq
|
|
; CHECK: vpinsrd
|
|
; CHECK: shlq
|
|
; CHECK: sarq
|
|
; CHECK: vpinsrd
|
|
; CHECK: vpmovsxdq
|
|
; CHECK: vmovd
|
|
; CHECK: vpinsrd
|
|
; CHECK: vpmovsxdq
|
|
; CHECK: vinsertf128
|
|
; CHECK: ret
|
|
|
|
define <16 x i4> @test4(<16 x i4>* %in) nounwind {
|
|
%ret = load <16 x i4>, <16 x i4>* %in, align 1
|
|
ret <16 x i4> %ret
|
|
}
|
|
|
|
; CHECK-LABEL: test4
|
|
; CHECK: movl
|
|
; CHECK-NEXT: shrl
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: movl
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vmovd
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movl
|
|
; CHECK-NEXT: shrl
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movl
|
|
; CHECK-NEXT: shrl
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movl
|
|
; CHECK-NEXT: shrl
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movl
|
|
; CHECK-NEXT: shrl
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movl
|
|
; CHECK-NEXT: shrl
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movl
|
|
; CHECK-NEXT: shrl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movq
|
|
; CHECK-NEXT: shrq
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movq
|
|
; CHECK-NEXT: shrq
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movq
|
|
; CHECK-NEXT: shrq
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movq
|
|
; CHECK-NEXT: shrq
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movq
|
|
; CHECK-NEXT: shrq
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movq
|
|
; CHECK-NEXT: shrq
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: movq
|
|
; CHECK-NEXT: shrq
|
|
; CHECK-NEXT: andl
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: shrq
|
|
; CHECK-NEXT: vpinsrb
|
|
; CHECK-NEXT: retq
|