mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-02 22:23:10 +00:00
D8982 ( checked in at http://reviews.llvm.org/rL239001 ) added command-line options to allow reciprocal estimate instructions to be used in place of divisions and square roots. This patch changes the default settings for x86 targets to allow that recip codegen (except for scalar division because that breaks too much code) when using -ffast-math or its equivalent. This matches GCC behavior for this kind of codegen. Differential Revision: http://reviews.llvm.org/D10396 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240310 91177308-0d34-0410-b5e6-96231b3b80d8
142 lines
4.8 KiB
LLVM
142 lines
4.8 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!sqrtf,!vec-sqrtf,!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
|
|
|
|
declare double @__sqrt_finite(double) #0
|
|
declare float @__sqrtf_finite(float) #0
|
|
declare x86_fp80 @__sqrtl_finite(x86_fp80) #0
|
|
declare float @llvm.sqrt.f32(float) #0
|
|
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
|
|
declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
|
|
|
|
|
|
define double @fd(double %d) #0 {
|
|
; NORECIP-LABEL: fd:
|
|
; NORECIP: # BB#0:
|
|
; NORECIP-NEXT: sqrtsd %xmm0, %xmm0
|
|
; NORECIP-NEXT: retq
|
|
;
|
|
; ESTIMATE-LABEL: fd:
|
|
; ESTIMATE: # BB#0:
|
|
; ESTIMATE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
|
; ESTIMATE-NEXT: retq
|
|
%call = tail call double @__sqrt_finite(double %d) #1
|
|
ret double %call
|
|
}
|
|
|
|
|
|
define float @ff(float %f) #0 {
|
|
; NORECIP-LABEL: ff:
|
|
; NORECIP: # BB#0:
|
|
; NORECIP-NEXT: sqrtss %xmm0, %xmm0
|
|
; NORECIP-NEXT: retq
|
|
;
|
|
; ESTIMATE-LABEL: ff:
|
|
; ESTIMATE: # BB#0:
|
|
; ESTIMATE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
|
|
; ESTIMATE-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm2
|
|
; ESTIMATE-NEXT: vmulss %xmm1, %xmm1, %xmm1
|
|
; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm1
|
|
; ESTIMATE-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
|
|
; ESTIMATE-NEXT: vmulss %xmm2, %xmm1, %xmm1
|
|
; ESTIMATE-NEXT: vmulss %xmm1, %xmm0, %xmm1
|
|
; ESTIMATE-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
; ESTIMATE-NEXT: vcmpeqss %xmm2, %xmm0, %xmm0
|
|
; ESTIMATE-NEXT: vandnps %xmm1, %xmm0, %xmm0
|
|
; ESTIMATE-NEXT: retq
|
|
%call = tail call float @__sqrtf_finite(float %f) #1
|
|
ret float %call
|
|
}
|
|
|
|
|
|
define x86_fp80 @fld(x86_fp80 %ld) #0 {
|
|
; NORECIP-LABEL: fld:
|
|
; NORECIP: # BB#0:
|
|
; NORECIP-NEXT: fldt {{[0-9]+}}(%rsp)
|
|
; NORECIP-NEXT: fsqrt
|
|
; NORECIP-NEXT: retq
|
|
;
|
|
; ESTIMATE-LABEL: fld:
|
|
; ESTIMATE: # BB#0:
|
|
; ESTIMATE-NEXT: fldt {{[0-9]+}}(%rsp)
|
|
; ESTIMATE-NEXT: fsqrt
|
|
; ESTIMATE-NEXT: retq
|
|
%call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #1
|
|
ret x86_fp80 %call
|
|
}
|
|
|
|
|
|
|
|
define float @reciprocal_square_root(float %x) #0 {
|
|
; NORECIP-LABEL: reciprocal_square_root:
|
|
; NORECIP: # BB#0:
|
|
; NORECIP-NEXT: sqrtss %xmm0, %xmm1
|
|
; NORECIP-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; NORECIP-NEXT: divss %xmm1, %xmm0
|
|
; NORECIP-NEXT: retq
|
|
;
|
|
; ESTIMATE-LABEL: reciprocal_square_root:
|
|
; ESTIMATE: # BB#0:
|
|
; ESTIMATE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
|
|
; ESTIMATE-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm2
|
|
; ESTIMATE-NEXT: vmulss %xmm1, %xmm1, %xmm1
|
|
; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm0
|
|
; ESTIMATE-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
|
|
; ESTIMATE-NEXT: vmulss %xmm2, %xmm0, %xmm0
|
|
; ESTIMATE-NEXT: retq
|
|
%sqrt = tail call float @llvm.sqrt.f32(float %x)
|
|
%div = fdiv fast float 1.0, %sqrt
|
|
ret float %div
|
|
}
|
|
|
|
define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
|
|
; NORECIP-LABEL: reciprocal_square_root_v4f32:
|
|
; NORECIP: # BB#0:
|
|
; NORECIP-NEXT: sqrtps %xmm0, %xmm1
|
|
; NORECIP-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
|
; NORECIP-NEXT: divps %xmm1, %xmm0
|
|
; NORECIP-NEXT: retq
|
|
;
|
|
; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
|
|
; ESTIMATE: # BB#0:
|
|
; ESTIMATE-NEXT: vrsqrtps %xmm0, %xmm1
|
|
; ESTIMATE-NEXT: vmulps %xmm1, %xmm1, %xmm2
|
|
; ESTIMATE-NEXT: vmulps %xmm0, %xmm2, %xmm0
|
|
; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
|
; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %xmm1, %xmm1
|
|
; ESTIMATE-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
|
; ESTIMATE-NEXT: retq
|
|
%sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
|
|
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
|
|
ret <4 x float> %div
|
|
}
|
|
|
|
define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
|
|
; NORECIP-LABEL: reciprocal_square_root_v8f32:
|
|
; NORECIP: # BB#0:
|
|
; NORECIP-NEXT: sqrtps %xmm1, %xmm2
|
|
; NORECIP-NEXT: sqrtps %xmm0, %xmm3
|
|
; NORECIP-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
|
; NORECIP-NEXT: movaps %xmm1, %xmm0
|
|
; NORECIP-NEXT: divps %xmm3, %xmm0
|
|
; NORECIP-NEXT: divps %xmm2, %xmm1
|
|
; NORECIP-NEXT: retq
|
|
;
|
|
; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
|
|
; ESTIMATE: # BB#0:
|
|
; ESTIMATE-NEXT: vrsqrtps %ymm0, %ymm1
|
|
; ESTIMATE-NEXT: vmulps %ymm1, %ymm1, %ymm2
|
|
; ESTIMATE-NEXT: vmulps %ymm0, %ymm2, %ymm0
|
|
; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0
|
|
; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
|
|
; ESTIMATE-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
|
; ESTIMATE-NEXT: retq
|
|
%sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
|
|
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
|
|
ret <8 x float> %div
|
|
}
|
|
|
|
|
|
attributes #0 = { "unsafe-fp-math"="true" }
|
|
attributes #1 = { nounwind readnone }
|
|
|