[avx] Add patterns for VINSERTF128rm.

This results in things such as vmovaps -96(%rbx), %xmm1 vinsertf128 $1, %xmm1, %ymm0, %ymm0 to be combined to vinsertf128 $1, -96(%rbx), %ymm0, %ymm0 rdar://10643481 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152762 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-12 07:37:34 +00:00 · 2012-03-15 00:45:30 +00:00 · 2012-03-15 00:45:30 +00:00 · 38ca0d78a2
commit 38ca0d78a2
parent ae029098a5
2 changed files with 25 additions and 0 deletions
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -7758,6 +7758,19 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
                                   (i32 imm)),
          (VINSERTF128rr VR256:$src1, VR128:$src2,
                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (alignedloadv4f32 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (alignedloadv2f64 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (alignedloadv2i64 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
 }

 //===----------------------------------------------------------------------===//
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@ -104,3 +104,15 @@ ret <8 x i32> %res
 }
 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone

+; rdar://10643481
+; CHECK: vinsertf128_combine
+define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovaps
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}