llvm-6502/test/CodeGen/X86/recip-fastmath.ll
Sanjay Patel e4e5cf5a66 make reciprocal estimate code generation more flexible by adding command-line options (3rd try)
The first try (r238051) to land this was reverted due to ExecutionEngine build failure;
that was hopefully addressed by r238788.

The second try (r238842) to land this was reverted due to BUILD_SHARED_LIBS failure;
that was hopefully addressed by r238953.

This patch adds a TargetRecip class for processing many recip codegen possibilities.
The class is intended to handle both command-line options to llc as well
as options passed in from a front-end such as clang with the -mrecip option.

The x86 backend is updated to use the new functionality.
Only -mcpu=btver2 with -ffast-math should see a functional change from this patch.
All other x86 CPUs continue to *not* use reciprocal estimates by default with -ffast-math.

Differential Revision: http://reviews.llvm.org/D8982



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239001 91177308-0d34-0410-b5e6-96231b3b80d8
2015-06-04 01:32:35 +00:00

110 lines
2.6 KiB
LLVM

; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
; we should generate the estimate sequence.
; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
; for details about the accuracy, speed, and implementation
; differences of x86 reciprocal estimates.
define float @reciprocal_estimate(float %x) #0 {
%div = fdiv fast float 1.0, %x
ret float %div
; CHECK-LABEL: reciprocal_estimate:
; CHECK: movss
; CHECK-NEXT: divss
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
; RECIP-LABEL: reciprocal_estimate:
; RECIP: vrcpss
; RECIP: vmulss
; RECIP: vsubss
; RECIP: vmulss
; RECIP: vaddss
; RECIP-NEXT: retq
; REFINE-LABEL: reciprocal_estimate:
; REFINE: vrcpss
; REFINE: vmulss
; REFINE: vsubss
; REFINE: vmulss
; REFINE: vaddss
; REFINE: vmulss
; REFINE: vsubss
; REFINE: vmulss
; REFINE: vaddss
; REFINE-NEXT: retq
}
define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
; CHECK-LABEL: reciprocal_estimate_v4f32:
; CHECK: movaps
; CHECK-NEXT: divps
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
; RECIP-LABEL: reciprocal_estimate_v4f32:
; RECIP: vrcpps
; RECIP: vmulps
; RECIP: vsubps
; RECIP: vmulps
; RECIP: vaddps
; RECIP-NEXT: retq
; REFINE-LABEL: reciprocal_estimate_v4f32:
; REFINE: vrcpps
; REFINE: vmulps
; REFINE: vsubps
; REFINE: vmulps
; REFINE: vaddps
; REFINE: vmulps
; REFINE: vsubps
; REFINE: vmulps
; REFINE: vaddps
; REFINE-NEXT: retq
}
define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
; CHECK-LABEL: reciprocal_estimate_v8f32:
; CHECK: movaps
; CHECK: movaps
; CHECK-NEXT: divps
; CHECK-NEXT: divps
; CHECK-NEXT: movaps
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
; RECIP-LABEL: reciprocal_estimate_v8f32:
; RECIP: vrcpps
; RECIP: vmulps
; RECIP: vsubps
; RECIP: vmulps
; RECIP: vaddps
; RECIP-NEXT: retq
; REFINE-LABEL: reciprocal_estimate_v8f32:
; REFINE: vrcpps
; REFINE: vmulps
; REFINE: vsubps
; REFINE: vmulps
; REFINE: vaddps
; REFINE: vmulps
; REFINE: vsubps
; REFINE: vmulps
; REFINE: vaddps
; REFINE-NEXT: retq
}
attributes #0 = { "unsafe-fp-math"="true" }