llvm-6502/test/CodeGen/PowerPC/recipest.ll
Sanjay Patel a4554c2897 Improve sqrt estimate algorithm (fast-math)
This patch changes the fast-math implementation for calculating sqrt(x) from:
y = 1 / (1 / sqrt(x))
to:
y = x * (1 / sqrt(x))

This has 2 benefits: less code / faster code and one less estimate instruction 
that may lose precision.

The only target that will be affected (until http://reviews.llvm.org/D5658 is approved)
is PPC. The difference in codegen for PPC is 2 less flops for a single-precision sqrtf
or vector sqrtf and 4 less flops for a double-precision sqrt. 
We also eliminate a constant load and extra register usage.

Differential Revision: http://reviews.llvm.org/D5682



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219445 91177308-0d34-0410-b5e6-96231b3b80d8
2014-10-09 21:26:35 +00:00

240 lines
4.7 KiB
LLVM

; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-SAFE %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
declare double @llvm.sqrt.f64(double)
declare float @llvm.sqrt.f32(float)
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
define double @foo(double %a, double %b) nounwind {
%x = call double @llvm.sqrt.f64(double %b)
%r = fdiv double %a, %x
ret double %r
; CHECK: @foo
; CHECK-DAG: frsqrte
; CHECK-DAG: fnmsub
; CHECK: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fmul
; CHECK: blr
; CHECK-SAFE: @foo
; CHECK-SAFE: fsqrt
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}
define double @foof(double %a, float %b) nounwind {
%x = call float @llvm.sqrt.f32(float %b)
%y = fpext float %x to double
%r = fdiv double %a, %y
ret double %r
; CHECK: @foof
; CHECK-DAG: frsqrtes
; CHECK-DAG: fnmsubs
; CHECK: fmuls
; CHECK-NEXT: fmadds
; CHECK-NEXT: fmuls
; CHECK-NEXT: fmul
; CHECK-NEXT: blr
; CHECK-SAFE: @foof
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}
define float @food(float %a, double %b) nounwind {
%x = call double @llvm.sqrt.f64(double %b)
%y = fptrunc double %x to float
%r = fdiv float %a, %y
ret float %r
; CHECK: @foo
; CHECK-DAG: frsqrte
; CHECK-DAG: fnmsub
; CHECK: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: frsp
; CHECK-NEXT: fmuls
; CHECK-NEXT: blr
; CHECK-SAFE: @foo
; CHECK-SAFE: fsqrt
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}
define float @goo(float %a, float %b) nounwind {
%x = call float @llvm.sqrt.f32(float %b)
%r = fdiv float %a, %x
ret float %r
; CHECK: @goo
; CHECK-DAG: frsqrtes
; CHECK-DAG: fnmsubs
; CHECK: fmuls
; CHECK-NEXT: fmadds
; CHECK-NEXT: fmuls
; CHECK-NEXT: fmuls
; CHECK-NEXT: blr
; CHECK-SAFE: @goo
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}
; Recognize that this is rsqrt(a) * rcp(b) * c,
; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
define float @rsqrt_fmul(float %a, float %b, float %c) {
%x = call float @llvm.sqrt.f32(float %a)
%y = fmul float %x, %b
%z = fdiv float %c, %y
ret float %z
; CHECK: @rsqrt_fmul
; CHECK-DAG: frsqrtes
; CHECK-DAG: fres
; CHECK-DAG: fnmsubs
; CHECK-DAG: fmuls
; CHECK-DAG: fnmsubs
; CHECK-DAG: fmadds
; CHECK-DAG: fmadds
; CHECK: fmuls
; CHECK-NEXT: fmuls
; CHECK-NEXT: fmuls
; CHECK-NEXT: blr
; CHECK-SAFE: @rsqrt_fmul
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fmuls
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}
define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%r = fdiv <4 x float> %a, %x
ret <4 x float> %r
; CHECK: @hoo
; CHECK: vrsqrtefp
; CHECK-SAFE: @hoo
; CHECK-SAFE-NOT: vrsqrtefp
; CHECK-SAFE: blr
}
define double @foo2(double %a, double %b) nounwind {
%r = fdiv double %a, %b
ret double %r
; CHECK: @foo2
; CHECK-DAG: fre
; CHECK-DAG: fnmsub
; CHECK: fmadd
; CHECK-NEXT: fnmsub
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: blr
; CHECK-SAFE: @foo2
; CHECK-SAFE: fdiv
; CHECK-SAFE: blr
}
define float @goo2(float %a, float %b) nounwind {
%r = fdiv float %a, %b
ret float %r
; CHECK: @goo2
; CHECK-DAG: fres
; CHECK-DAG: fnmsubs
; CHECK: fmadds
; CHECK-NEXT: fmuls
; CHECK-NEXT: blr
; CHECK-SAFE: @goo2
; CHECK-SAFE: fdivs
; CHECK-SAFE: blr
}
define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
%r = fdiv <4 x float> %a, %b
ret <4 x float> %r
; CHECK: @hoo2
; CHECK: vrefp
; CHECK-SAFE: @hoo2
; CHECK-SAFE-NOT: vrefp
; CHECK-SAFE: blr
}
define double @foo3(double %a) nounwind {
%r = call double @llvm.sqrt.f64(double %a)
ret double %r
; CHECK: @foo3
; CHECK: fcmpu
; CHECK-DAG: frsqrte
; CHECK-DAG: fnmsub
; CHECK: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
; CHECK-NEXT: fmul
; CHECK: blr
; CHECK-SAFE: @foo3
; CHECK-SAFE: fsqrt
; CHECK-SAFE: blr
}
define float @goo3(float %a) nounwind {
%r = call float @llvm.sqrt.f32(float %a)
ret float %r
; CHECK: @goo3
; CHECK: fcmpu
; CHECK-DAG: frsqrtes
; CHECK-DAG: fnmsubs
; CHECK: fmuls
; CHECK-NEXT: fmadds
; CHECK-NEXT: fmuls
; CHECK-NEXT: fmuls
; CHECK: blr
; CHECK-SAFE: @goo3
; CHECK-SAFE: fsqrts
; CHECK-SAFE: blr
}
define <4 x float> @hoo3(<4 x float> %a) nounwind {
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %r
; CHECK: @hoo3
; CHECK: vrsqrtefp
; CHECK-DAG: vcmpeqfp
; CHECK-SAFE: @hoo3
; CHECK-SAFE-NOT: vrsqrtefp
; CHECK-SAFE: blr
}