llvm-6502/test/CodeGen/X86/recip-fastmath.ll
Sanjay Patel dab91bcc3a Expose the number of Newton-Raphson iterations applied to the hardware's reciprocal estimate as a parameter (x86).
This is a follow-on to r221706 and r221731 and discussed in more detail in PR21385.

This patch also loosens the testcase checking for btver2. We know that the "1.0" will be loaded, but
we can't tell exactly when, so replace the CHECK-NEXT specifiers with plain CHECKs. The CHECK-NEXT
sequence relied on a quirk of post-RA-scheduling that may change independently of anything in these tests.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221819 91177308-0d34-0410-b5e6-96231b3b80d8
2014-11-12 21:39:01 +00:00

110 lines
2.6 KiB
LLVM

; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
; we should generate the estimate sequence.
; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
; for details about the accuracy, speed, and implementation
; differences of x86 reciprocal estimates.
define float @reciprocal_estimate(float %x) #0 {
%div = fdiv fast float 1.0, %x
ret float %div
; CHECK-LABEL: reciprocal_estimate:
; CHECK: movss
; CHECK-NEXT: divss
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
; BTVER2-LABEL: reciprocal_estimate:
; BTVER2: vrcpss
; BTVER2: vmulss
; BTVER2: vsubss
; BTVER2: vmulss
; BTVER2: vaddss
; BTVER2-NEXT: retq
; REFINE-LABEL: reciprocal_estimate:
; REFINE: vrcpss
; REFINE: vmulss
; REFINE: vsubss
; REFINE: vmulss
; REFINE: vaddss
; REFINE: vmulss
; REFINE: vsubss
; REFINE: vmulss
; REFINE: vaddss
; REFINE-NEXT: retq
}
define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
; CHECK-LABEL: reciprocal_estimate_v4f32:
; CHECK: movaps
; CHECK-NEXT: divps
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
; BTVER2-LABEL: reciprocal_estimate_v4f32:
; BTVER2: vrcpps
; BTVER2: vmulps
; BTVER2: vsubps
; BTVER2: vmulps
; BTVER2: vaddps
; BTVER2-NEXT: retq
; REFINE-LABEL: reciprocal_estimate_v4f32:
; REFINE: vrcpps
; REFINE: vmulps
; REFINE: vsubps
; REFINE: vmulps
; REFINE: vaddps
; REFINE: vmulps
; REFINE: vsubps
; REFINE: vmulps
; REFINE: vaddps
; REFINE-NEXT: retq
}
define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
; CHECK-LABEL: reciprocal_estimate_v8f32:
; CHECK: movaps
; CHECK: movaps
; CHECK-NEXT: divps
; CHECK-NEXT: divps
; CHECK-NEXT: movaps
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
; BTVER2-LABEL: reciprocal_estimate_v8f32:
; BTVER2: vrcpps
; BTVER2: vmulps
; BTVER2: vsubps
; BTVER2: vmulps
; BTVER2: vaddps
; BTVER2-NEXT: retq
; REFINE-LABEL: reciprocal_estimate_v8f32:
; REFINE: vrcpps
; REFINE: vmulps
; REFINE: vsubps
; REFINE: vmulps
; REFINE: vaddps
; REFINE: vmulps
; REFINE: vsubps
; REFINE: vmulps
; REFINE: vaddps
; REFINE-NEXT: retq
}
attributes #0 = { "unsafe-fp-math"="true" }