mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	[X86] When pattern-matching scalar FMA3 intrinsics, don't re-arrange the first and second operands.
The semantics of the scalar FMA intrinsics are that the high vector elements are copied from the first source. The existing pattern switches src1 and src2 around, to match the "213" order, which ends up tying the original src2 to the dest. Since the actual scalar fma3 instructions copy the high elements from the dest register, the wrong values are copied. This modifies the pattern to leave src1 and src2 in their original order. Differential Revision: http://reviews.llvm.org/D9908 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238131 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		@@ -183,19 +183,24 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
 | 
				
			|||||||
  defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
 | 
					  defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
 | 
				
			||||||
                        FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
 | 
					                        FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// These patterns use the 123 ordering, instead of 213, even though
 | 
				
			||||||
 | 
					// they match the intrinsic to the 213 version of the instruction.
 | 
				
			||||||
 | 
					// This is because src1 is tied to dest, and the scalar intrinsics
 | 
				
			||||||
 | 
					// require the pass-through values to come from the first source
 | 
				
			||||||
 | 
					// operand, not the second.
 | 
				
			||||||
  def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
 | 
					  def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
 | 
				
			||||||
            (COPY_TO_REGCLASS
 | 
					            (COPY_TO_REGCLASS
 | 
				
			||||||
              (!cast<Instruction>(NAME#"SSr213r")
 | 
					              (!cast<Instruction>(NAME#"SSr213r")
 | 
				
			||||||
                (COPY_TO_REGCLASS $src2, FR32),
 | 
					 | 
				
			||||||
                (COPY_TO_REGCLASS $src1, FR32),
 | 
					                (COPY_TO_REGCLASS $src1, FR32),
 | 
				
			||||||
 | 
					                (COPY_TO_REGCLASS $src2, FR32),
 | 
				
			||||||
                (COPY_TO_REGCLASS $src3, FR32)),
 | 
					                (COPY_TO_REGCLASS $src3, FR32)),
 | 
				
			||||||
              VR128)>;
 | 
					              VR128)>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
 | 
					  def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
 | 
				
			||||||
            (COPY_TO_REGCLASS
 | 
					            (COPY_TO_REGCLASS
 | 
				
			||||||
              (!cast<Instruction>(NAME#"SDr213r")
 | 
					              (!cast<Instruction>(NAME#"SDr213r")
 | 
				
			||||||
                (COPY_TO_REGCLASS $src2, FR64),
 | 
					 | 
				
			||||||
                (COPY_TO_REGCLASS $src1, FR64),
 | 
					                (COPY_TO_REGCLASS $src1, FR64),
 | 
				
			||||||
 | 
					                (COPY_TO_REGCLASS $src2, FR64),
 | 
				
			||||||
                (COPY_TO_REGCLASS $src3, FR64)),
 | 
					                (COPY_TO_REGCLASS $src3, FR64)),
 | 
				
			||||||
              VR128)>;
 | 
					              VR128)>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,7 +3,9 @@
 | 
				
			|||||||
; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
 | 
					; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 | 
					define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 | 
				
			||||||
  ; CHECK: fmadd213ss (%r8), %xmm
 | 
					  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK: fmadd213ss (%r8), [[XMM1]], [[XMM0]]
 | 
				
			||||||
  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
 | 
					  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
 | 
				
			||||||
  ret <4 x float> %res
 | 
					  ret <4 x float> %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -24,7 +26,9 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f
 | 
				
			|||||||
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 | 
					declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 | 
					define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 | 
				
			||||||
  ; CHECK: fnmadd213ss (%r8), %xmm
 | 
					  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK: fnmadd213ss (%r8), [[XMM1]], [[XMM0]]
 | 
				
			||||||
  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
 | 
					  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
 | 
				
			||||||
  ret <4 x float> %res
 | 
					  ret <4 x float> %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -46,7 +50,9 @@ declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 | 
					define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 | 
				
			||||||
  ; CHECK: fmsub213ss
 | 
					  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK: fmsub213ss (%r8), [[XMM1]], [[XMM0]]
 | 
				
			||||||
  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
 | 
					  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
 | 
				
			||||||
  ret <4 x float> %res
 | 
					  ret <4 x float> %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -60,7 +66,9 @@ define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo
 | 
				
			|||||||
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 | 
					declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 | 
					define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 | 
				
			||||||
  ; CHECK: fnmsub213ss
 | 
					  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK: fnmsub213ss (%r8), [[XMM1]], [[XMM0]]
 | 
				
			||||||
  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
 | 
					  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
 | 
				
			||||||
  ret <4 x float> %res
 | 
					  ret <4 x float> %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -76,7 +84,9 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x floa
 | 
				
			|||||||
;;;;
 | 
					;;;;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 | 
					define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 | 
				
			||||||
  ; CHECK: fmadd213sd
 | 
					  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK: fmadd213sd (%r8), [[XMM1]], [[XMM0]]
 | 
				
			||||||
  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
 | 
					  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
 | 
				
			||||||
  ret <2 x double> %res
 | 
					  ret <2 x double> %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -90,7 +100,9 @@ define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x
 | 
				
			|||||||
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 | 
					declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 | 
					define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 | 
				
			||||||
  ; CHECK: fnmadd213sd
 | 
					  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK: fnmadd213sd (%r8), [[XMM1]], [[XMM0]]
 | 
				
			||||||
  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
 | 
					  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
 | 
				
			||||||
  ret <2 x double> %res
 | 
					  ret <2 x double> %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -106,7 +118,9 @@ declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x d
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 | 
					define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 | 
				
			||||||
  ; CHECK: fmsub213sd
 | 
					  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK: fmsub213sd (%r8), [[XMM1]], [[XMM0]]
 | 
				
			||||||
  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
 | 
					  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
 | 
				
			||||||
  ret <2 x double> %res
 | 
					  ret <2 x double> %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -120,7 +134,9 @@ define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x
 | 
				
			|||||||
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 | 
					declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 | 
					define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 | 
				
			||||||
  ; CHECK: fnmsub213sd
 | 
					  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
 | 
				
			||||||
 | 
					  ; CHECK: fnmsub213sd (%r8), [[XMM1]], [[XMM0]]
 | 
				
			||||||
  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
 | 
					  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
 | 
				
			||||||
  ret <2 x double> %res
 | 
					  ret <2 x double> %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user