diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 17e91a6efb0..9a37ec53277 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5576,31 +5576,43 @@ let Predicates = [HasAVX] in { (VPMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), (VPMOVSXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), + (VPMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), (VPMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), (VPMOVSXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), + (VPMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), (VPMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), (VPMOVSXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), + (VPMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), (VPMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), (VPMOVZXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), + (VPMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), (VPMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), (VPMOVZXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), + (VPMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), (VPMOVZXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), (VPMOVZXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), + (VPMOVZXDQrm addr:$src)>; } let Predicates = [UseSSE41] in { @@ -5609,31 +5621,43 @@ let Predicates = [UseSSE41] in { (PMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), (PMOVSXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), + (PMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), (PMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), (PMOVSXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), + (PMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), (PMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), (PMOVSXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), + (PMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), (PMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), (PMOVZXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), + (PMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), (PMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), (PMOVZXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), + (PMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), (PMOVZXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), (PMOVZXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), + (PMOVZXDQrm addr:$src)>; } let Predicates = [HasAVX2] in { diff --git a/test/CodeGen/X86/pmovext.ll b/test/CodeGen/X86/pmovext.ll new file mode 100644 index 00000000000..c4530465c78 --- /dev/null +++ b/test/CodeGen/X86/pmovext.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s + +; rdar://11897677 + +;CHECK: intrin_pmov +;CHECK: pmovzxbw (%rsi), %xmm0 +;CHECK-NEXT: movdqu +;CHECK-NEXT: ret +define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp { + %1 = bitcast i8* %src to <2 x i64>* + %2 = load <2 x i64>* %1, align 16 + %3 = bitcast <2 x i64> %2 to <16 x i8> + %4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind + %5 = bitcast i16* %dest to i8* + %6 = bitcast <8 x i16> %4 to <16 x i8> + tail call void @llvm.x86.sse2.storeu.dq(i8* %5, <16 x i8> %6) nounwind + ret void +} + +declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone + +declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind