Use sse_load_f32/64 for scalar FMA3 intrinsic patterns instead of 128-bit loads to match instruction behavior.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157895 91177308-0d34-0410-b5e6-96231b3b80d8
2025-08-05 13:26:55 +00:00 · 2012-06-03 01:40:43 +00:00
parent 60dda38008
commit 57ae246a6a
2 changed files with 32 additions and 38 deletions
--- a/test/CodeGen/X86/fma3-intrinsics.ll
+++ b/test/CodeGen/X86/fma3-intrinsics.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma3 | FileCheck %s

 define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmadd132ss {{.*\(%r.*}}, %xmm
+  ; CHECK: fmadd132ss %xmm
  %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
  ret <4 x float> %res
 }
@@ -22,7 +22,7 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f
 declare <8 x float> @llvm.x86.fma4.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone

 define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmadd132ss {{.*\(%r.*}}, %xmm
+  ; CHECK: fnmadd132ss %xmm
  %res = call <4 x float> @llvm.x86.fma4.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
  ret <4 x float> %res
 }