mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
dab91bcc3a
This is a follow-on to r221706 and r221731 and discussed in more detail in PR21385. This patch also loosens the testcase checking for btver2. We know that the "1.0" will be loaded, but we can't tell exactly when, so replace the CHECK-NEXT specifiers with plain CHECKs. The CHECK-NEXT sequence relied on a quirk of post-RA-scheduling that may change independently of anything in these tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221819 91177308-0d34-0410-b5e6-96231b3b80d8
110 lines
2.6 KiB
LLVM
110 lines
2.6 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
|
|
|
|
; If the target's divss/divps instructions are substantially
|
|
; slower than rcpss/rcpps with a Newton-Raphson refinement,
|
|
; we should generate the estimate sequence.
|
|
|
|
; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
|
|
; for details about the accuracy, speed, and implementation
|
|
; differences of x86 reciprocal estimates.
|
|
|
|
define float @reciprocal_estimate(float %x) #0 {
|
|
%div = fdiv fast float 1.0, %x
|
|
ret float %div
|
|
|
|
; CHECK-LABEL: reciprocal_estimate:
|
|
; CHECK: movss
|
|
; CHECK-NEXT: divss
|
|
; CHECK-NEXT: movaps
|
|
; CHECK-NEXT: retq
|
|
|
|
; BTVER2-LABEL: reciprocal_estimate:
|
|
; BTVER2: vrcpss
|
|
; BTVER2: vmulss
|
|
; BTVER2: vsubss
|
|
; BTVER2: vmulss
|
|
; BTVER2: vaddss
|
|
; BTVER2-NEXT: retq
|
|
|
|
; REFINE-LABEL: reciprocal_estimate:
|
|
; REFINE: vrcpss
|
|
; REFINE: vmulss
|
|
; REFINE: vsubss
|
|
; REFINE: vmulss
|
|
; REFINE: vaddss
|
|
; REFINE: vmulss
|
|
; REFINE: vsubss
|
|
; REFINE: vmulss
|
|
; REFINE: vaddss
|
|
; REFINE-NEXT: retq
|
|
}
|
|
|
|
define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
|
|
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
|
ret <4 x float> %div
|
|
|
|
; CHECK-LABEL: reciprocal_estimate_v4f32:
|
|
; CHECK: movaps
|
|
; CHECK-NEXT: divps
|
|
; CHECK-NEXT: movaps
|
|
; CHECK-NEXT: retq
|
|
|
|
; BTVER2-LABEL: reciprocal_estimate_v4f32:
|
|
; BTVER2: vrcpps
|
|
; BTVER2: vmulps
|
|
; BTVER2: vsubps
|
|
; BTVER2: vmulps
|
|
; BTVER2: vaddps
|
|
; BTVER2-NEXT: retq
|
|
|
|
; REFINE-LABEL: reciprocal_estimate_v4f32:
|
|
; REFINE: vrcpps
|
|
; REFINE: vmulps
|
|
; REFINE: vsubps
|
|
; REFINE: vmulps
|
|
; REFINE: vaddps
|
|
; REFINE: vmulps
|
|
; REFINE: vsubps
|
|
; REFINE: vmulps
|
|
; REFINE: vaddps
|
|
; REFINE-NEXT: retq
|
|
}
|
|
|
|
define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
|
|
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
|
ret <8 x float> %div
|
|
|
|
; CHECK-LABEL: reciprocal_estimate_v8f32:
|
|
; CHECK: movaps
|
|
; CHECK: movaps
|
|
; CHECK-NEXT: divps
|
|
; CHECK-NEXT: divps
|
|
; CHECK-NEXT: movaps
|
|
; CHECK-NEXT: movaps
|
|
; CHECK-NEXT: retq
|
|
|
|
; BTVER2-LABEL: reciprocal_estimate_v8f32:
|
|
; BTVER2: vrcpps
|
|
; BTVER2: vmulps
|
|
; BTVER2: vsubps
|
|
; BTVER2: vmulps
|
|
; BTVER2: vaddps
|
|
; BTVER2-NEXT: retq
|
|
|
|
; REFINE-LABEL: reciprocal_estimate_v8f32:
|
|
; REFINE: vrcpps
|
|
; REFINE: vmulps
|
|
; REFINE: vsubps
|
|
; REFINE: vmulps
|
|
; REFINE: vaddps
|
|
; REFINE: vmulps
|
|
; REFINE: vsubps
|
|
; REFINE: vmulps
|
|
; REFINE: vaddps
|
|
; REFINE-NEXT: retq
|
|
}
|
|
|
|
attributes #0 = { "unsafe-fp-math"="true" }
|