[X86] Fixes bug in build_vector v4x32 lowering

r222375 made some improvements to build_vector lowering of v4x32 and v4xf32 into an insertps, but it missed a case where: 1. A single extracted element is used twice. 2. The lower of the two non-zero indexes should be preserved, and the higher should be used for the dest mask. This caused a crash, since the source value for the insertps ends-up uninitialized. Differential Revision: http://reviews.llvm.org/D6377 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222635 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-25 13:24:46 +00:00 · 2014-11-23 13:09:06 +00:00
parent 71777d18ad
commit d539147834
2 changed files with 23 additions and 3 deletions
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1145,6 +1145,23 @@ entry:
  ret <4 x float> %vecinit3
 }

+define <4 x float> @insertps_10(<4 x float> %A)
+{
+; X32-LABEL: insertps_10:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_10:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
+; X64-NEXT:    retq
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0
+  %vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2
+  ret <4 x float> %vecbuild2
+}
+
 define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
 ; X32-LABEL: build_vector_to_shuffle_1:
 ; X32:       ## BB#0: