mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-01 02:33:44 +00:00
The PMOVZXWD family of functions had patterns extends narrow vector types to wide vector types.
It had patterns for zext-loading and extending. This commit adds patterns for loading a wide type, performing a bitcast, and extending. This is an odd pattern, but it is commonly used when writing code with intrinsics. rdar://11897677 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163995 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3c0e5c9ece
commit
638e4c13cb
@ -5576,31 +5576,43 @@ let Predicates = [HasAVX] in {
|
||||
(VPMOVSXBWrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
|
||||
(VPMOVSXBWrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
|
||||
(VPMOVSXBWrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
|
||||
(VPMOVSXWDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
|
||||
(VPMOVSXWDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
|
||||
(VPMOVSXWDrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
|
||||
(VPMOVSXDQrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
|
||||
(VPMOVSXDQrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
|
||||
(VPMOVSXDQrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
|
||||
(VPMOVZXBWrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
|
||||
(VPMOVZXBWrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
|
||||
(VPMOVZXBWrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
|
||||
(VPMOVZXWDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
|
||||
(VPMOVZXWDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
|
||||
(VPMOVZXWDrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
|
||||
(VPMOVZXDQrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
|
||||
(VPMOVZXDQrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
|
||||
(VPMOVZXDQrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE41] in {
|
||||
@ -5609,31 +5621,43 @@ let Predicates = [UseSSE41] in {
|
||||
(PMOVSXBWrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
|
||||
(PMOVSXBWrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
|
||||
(PMOVSXBWrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVSXWDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
|
||||
(PMOVSXWDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
|
||||
(PMOVSXWDrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVSXDQrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
|
||||
(PMOVSXDQrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
|
||||
(PMOVSXDQrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVZXBWrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
|
||||
(PMOVZXBWrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
|
||||
(PMOVZXBWrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVZXWDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
|
||||
(PMOVZXWDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
|
||||
(PMOVZXWDrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVZXDQrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
|
||||
(PMOVZXDQrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
|
||||
(PMOVZXDQrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
|
22
test/CodeGen/X86/pmovext.ll
Normal file
22
test/CodeGen/X86/pmovext.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
|
||||
|
||||
; rdar://11897677
|
||||
|
||||
;CHECK: intrin_pmov
|
||||
;CHECK: pmovzxbw (%rsi), %xmm0
|
||||
;CHECK-NEXT: movdqu
|
||||
;CHECK-NEXT: ret
|
||||
define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp {
|
||||
%1 = bitcast i8* %src to <2 x i64>*
|
||||
%2 = load <2 x i64>* %1, align 16
|
||||
%3 = bitcast <2 x i64> %2 to <16 x i8>
|
||||
%4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind
|
||||
%5 = bitcast i16* %dest to i8*
|
||||
%6 = bitcast <8 x i16> %4 to <16 x i8>
|
||||
tail call void @llvm.x86.sse2.storeu.dq(i8* %5, <16 x i8> %6) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
|
Loading…
x
Reference in New Issue
Block a user