Use movq to move low half of XMM register and zero-extend the rest.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50874 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-25 13:24:46 +00:00 · 2008-05-08 22:35:02 +00:00
parent 748118d8aa
commit fd17f42bab
2 changed files with 12 additions and 1 deletions
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2916,7 +2916,8 @@ let AddedComplexity = 15 in
 def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
           MOVL_shuffle_mask)),
          (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
-
+def : Pat<(v2f64 (X86zvmovl (v2f64 VR128:$src))),
+          (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;

 // FIXME: Temporary workaround since 2-wide shuffle is broken.
 def : Pat<(int_x86_sse2_movs_d  VR128:$src1, VR128:$src2),
--- a/test/CodeGen/X86/vec_set-E.ll
+++ b/test/CodeGen/X86/vec_set-E.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+
+define i32 @t(float %X) nounwind  {
+	%tmp11 = insertelement <4 x float> undef, float %X, i32 0
+	%tmp12 = insertelement <4 x float> %tmp11, float %X, i32 1
+	%tmp27 = insertelement <4 x float> %tmp12, float 0.000000e+00, i32 2
+	%tmp28 = insertelement <4 x float> %tmp27, float 0.000000e+00, i32 3
+	store <4 x float> %tmp28, <4 x float>* null, align 16
+	ret i32 0
+}