llvm-6502/test/CodeGen/X86/pr15267.ll
Simon Pilgrim 87d1836793 [X86][SSE] Improve support for 128-bit vector sign extension
This patch improves support for sign extension of the lower lanes of vectors of integers by making use of the SSE41 pmovsx* sign extension instructions where possible, and optimizing the sign extension by shifts on pre-SSE41 targets (avoiding the use of i64 arithmetic shifts which require scalarization).

It converts SIGN_EXTEND nodes to SIGN_EXTEND_VECTOR_INREG where necessary, that more closely matches the pmovsx* instruction than the default approach of using SIGN_EXTEND_INREG which splits the operation (into an ANY_EXTEND lowered to a shuffle followed by shifts) making instruction matching difficult during lowering. Necessary support for SIGN_EXTEND_VECTOR_INREG has been added to the DAGCombiner.

Differential Revision: http://reviews.llvm.org/D9848

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237885 91177308-0d34-0410-b5e6-96231b3b80d8
2015-05-21 10:05:03 +00:00

139 lines
2.6 KiB
LLVM

; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
define <4 x i3> @test1(<4 x i3>* %in) nounwind {
%ret = load <4 x i3>, <4 x i3>* %in, align 1
ret <4 x i3> %ret
}
; CHECK-LABEL: test1
; CHECK: movzwl
; CHECK: shrl $3
; CHECK: andl $7
; CHECK: andl $7
; CHECK: vmovd
; CHECK: pinsrd $1
; CHECK: shrl $6
; CHECK: andl $7
; CHECK: pinsrd $2
; CHECK: shrl $9
; CHECK: andl $7
; CHECK: pinsrd $3
; CHECK: ret
define <4 x i1> @test2(<4 x i1>* %in) nounwind {
%ret = load <4 x i1>, <4 x i1>* %in, align 1
ret <4 x i1> %ret
}
; CHECK-LABEL: test2
; CHECK: movzbl
; CHECK: shrl
; CHECK: andl $1
; CHECK: andl $1
; CHECK: vmovd
; CHECK: pinsrd $1
; CHECK: shrl $2
; CHECK: andl $1
; CHECK: pinsrd $2
; CHECK: shrl $3
; CHECK: andl $1
; CHECK: pinsrd $3
; CHECK: ret
define <4 x i64> @test3(<4 x i1>* %in) nounwind {
%wide.load35 = load <4 x i1>, <4 x i1>* %in, align 1
%sext = sext <4 x i1> %wide.load35 to <4 x i64>
ret <4 x i64> %sext
}
; CHECK-LABEL: test3
; CHECK: movzbl
; CHECK: movq
; CHECK: shlq
; CHECK: sarq
; CHECK: movq
; CHECK: shlq
; CHECK: sarq
; CHECK: vmovd
; CHECK: vpinsrd
; CHECK: movq
; CHECK: shlq
; CHECK: sarq
; CHECK: vpinsrd
; CHECK: shlq
; CHECK: sarq
; CHECK: vpinsrd
; CHECK: vpmovsxdq
; CHECK: vmovd
; CHECK: vpinsrd
; CHECK: vpmovsxdq
; CHECK: vinsertf128
; CHECK: ret
define <16 x i4> @test4(<16 x i4>* %in) nounwind {
%ret = load <16 x i4>, <16 x i4>* %in, align 1
ret <16 x i4> %ret
}
; CHECK-LABEL: test4
; CHECK: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: movl
; CHECK-NEXT: andl
; CHECK-NEXT: vmovd
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: shrq
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: retq