From 97136c922ec4b492584cb91c1fc1cdcb40983ecf Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Mon, 19 Sep 2011 23:36:50 +0000 Subject: [PATCH] Based on the small opt Zvi's patch was trying to achieve, eliminate 128-bit undef subvector insertion into a 256-bit vector git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140097 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 15 +++++++++++++++ test/CodeGen/X86/avx-vinsertf128.ll | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index bd43d0bcad3..fdf119e4767 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -157,6 +157,21 @@ def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)), def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +// Inserting a 128-bit undef vector into the high part of a 256-bit +// vector should return the 256-bit vector itself. +def : Pat<(insert_subvector (v8i32 VR256:$src), undef, (i32 4)), + (v8i32 VR256:$src)>; +def : Pat<(insert_subvector (v8f32 VR256:$src), undef, (i32 4)), + (v8f32 VR256:$src)>; +def : Pat<(insert_subvector (v4i64 VR256:$src), undef, (i32 4)), + (v4i64 VR256:$src)>; +def : Pat<(insert_subvector (v4f64 VR256:$src), undef, (i32 4)), + (v4f64 VR256:$src)>; +def : Pat<(insert_subvector (v16i16 VR256:$src), undef, (i32 4)), + (v16i16 VR256:$src)>; +def : Pat<(insert_subvector (v32i8 VR256:$src), undef, (i32 4)), + (v32i8 VR256:$src)>; + // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll index a6f258513a5..8813a7f6ab8 100644 --- a/test/CodeGen/X86/avx-vinsertf128.ll +++ b/test/CodeGen/X86/avx-vinsertf128.ll @@ -36,3 +36,11 @@ allocas: store <4 x i32> %blendAsInt.i503, <4 x i32>* undef, align 4 ret void } + +; CHECK: _C +; CHECK-NOT: vinsertf128 $1 +define <4 x i32> @C(<4 x i32> %v1) nounwind readonly { + %1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <8 x i32> + %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> + ret <4 x i32> %2 +}