Use sse_load_f32/64 for scalar FMA3 intrinsic patterns instead of 128-bit loads to match instruction behavior.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157895 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper
2012-06-03 01:40:43 +00:00
parent 60dda38008
commit 57ae246a6a
2 changed files with 32 additions and 38 deletions

View File

@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma3 | FileCheck %s
define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
; CHECK: fmadd132ss {{.*\(%r.*}}, %xmm
; CHECK: fmadd132ss %xmm
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
@@ -22,7 +22,7 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f
declare <8 x float> @llvm.x86.fma4.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
; CHECK: fnmadd132ss {{.*\(%r.*}}, %xmm
; CHECK: fnmadd132ss %xmm
%res = call <4 x float> @llvm.x86.fma4.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}