2014-10-21 13:02:37 +00:00
|
|
|
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s
|
|
|
|
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s
|
2013-04-03 04:01:11 +00:00
|
|
|
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
|
|
|
target triple = "powerpc64-unknown-linux-gnu"
|
|
|
|
|
|
|
|
declare double @llvm.sqrt.f64(double)
|
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
|
|
|
|
|
|
|
define double @foo(double %a, double %b) nounwind {
|
|
|
|
%x = call double @llvm.sqrt.f64(double %b)
|
|
|
|
%r = fdiv double %a, %x
|
|
|
|
ret double %r
|
|
|
|
|
|
|
|
; CHECK: @foo
|
2013-05-16 16:15:18 +00:00
|
|
|
; CHECK-DAG: frsqrte
|
|
|
|
; CHECK-DAG: fnmsub
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: fmul
|
2014-09-26 23:01:47 +00:00
|
|
|
; CHECK-NEXT: fmadd
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: fmadd
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: fmul
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: blr
|
|
|
|
|
|
|
|
; CHECK-SAFE: @foo
|
|
|
|
; CHECK-SAFE: fsqrt
|
|
|
|
; CHECK-SAFE: fdiv
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
2013-04-04 22:44:12 +00:00
|
|
|
define double @foof(double %a, float %b) nounwind {
|
|
|
|
%x = call float @llvm.sqrt.f32(float %b)
|
|
|
|
%y = fpext float %x to double
|
|
|
|
%r = fdiv double %a, %y
|
|
|
|
ret double %r
|
|
|
|
|
|
|
|
; CHECK: @foof
|
2013-05-16 16:15:18 +00:00
|
|
|
; CHECK-DAG: frsqrtes
|
|
|
|
; CHECK-DAG: fnmsubs
|
2013-04-04 22:44:12 +00:00
|
|
|
; CHECK: fmuls
|
2014-09-22 22:46:44 +00:00
|
|
|
; CHECK-NEXT: fmadds
|
|
|
|
; CHECK-NEXT: fmuls
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: blr
|
2013-04-04 22:44:12 +00:00
|
|
|
|
|
|
|
; CHECK-SAFE: @foof
|
|
|
|
; CHECK-SAFE: fsqrts
|
|
|
|
; CHECK-SAFE: fdiv
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
|
|
|
define float @food(float %a, double %b) nounwind {
|
|
|
|
%x = call double @llvm.sqrt.f64(double %b)
|
|
|
|
%y = fptrunc double %x to float
|
|
|
|
%r = fdiv float %a, %y
|
|
|
|
ret float %r
|
|
|
|
|
|
|
|
; CHECK: @foo
|
2013-05-16 16:15:18 +00:00
|
|
|
; CHECK-DAG: frsqrte
|
|
|
|
; CHECK-DAG: fnmsub
|
2013-04-04 22:44:12 +00:00
|
|
|
; CHECK: fmul
|
2014-09-22 22:46:44 +00:00
|
|
|
; CHECK-NEXT: fmadd
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: fmadd
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: frsp
|
|
|
|
; CHECK-NEXT: fmuls
|
|
|
|
; CHECK-NEXT: blr
|
2013-04-04 22:44:12 +00:00
|
|
|
|
|
|
|
; CHECK-SAFE: @foo
|
|
|
|
; CHECK-SAFE: fsqrt
|
|
|
|
; CHECK-SAFE: fdivs
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
2013-04-03 04:01:11 +00:00
|
|
|
define float @goo(float %a, float %b) nounwind {
|
|
|
|
%x = call float @llvm.sqrt.f32(float %b)
|
|
|
|
%r = fdiv float %a, %x
|
|
|
|
ret float %r
|
|
|
|
|
|
|
|
; CHECK: @goo
|
2013-05-16 16:15:18 +00:00
|
|
|
; CHECK-DAG: frsqrtes
|
|
|
|
; CHECK-DAG: fnmsubs
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: fmuls
|
2014-09-26 23:01:47 +00:00
|
|
|
; CHECK-NEXT: fmadds
|
|
|
|
; CHECK-NEXT: fmuls
|
|
|
|
; CHECK-NEXT: fmuls
|
|
|
|
; CHECK-NEXT: blr
|
2013-04-03 04:01:11 +00:00
|
|
|
|
|
|
|
; CHECK-SAFE: @goo
|
|
|
|
; CHECK-SAFE: fsqrts
|
|
|
|
; CHECK-SAFE: fdivs
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
Fast-math fold: x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
The motivation is to recognize code such as this from /llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c:
float distance = sqrt(dx * dx + dy * dy + dz * dz);
float mag = dt / (distance * distance * distance);
Without this patch, we don't match the sqrt as a reciprocal sqrt, so for PPC the new testcase in this patch produces:
addis 3, 2, .LCPI4_2@toc@ha
lfs 4, .LCPI4_2@toc@l(3)
addis 3, 2, .LCPI4_1@toc@ha
lfs 0, .LCPI4_1@toc@l(3)
fcmpu 0, 1, 4
beq 0, .LBB4_2
# BB#1:
frsqrtes 4, 1
addis 3, 2, .LCPI4_0@toc@ha
lfs 5, .LCPI4_0@toc@l(3)
fnmsubs 13, 1, 5, 1
fmuls 6, 4, 4
fmadds 1, 13, 6, 5
fmuls 1, 4, 1
fres 4, 1 <--- reciprocal of reciprocal square root
fnmsubs 1, 1, 4, 0
fmadds 4, 4, 1, 4
.LBB4_2:
fmuls 1, 4, 2
fres 2, 1
fnmsubs 0, 1, 2, 0
fmadds 0, 2, 0, 2
fmuls 1, 3, 0
blr
After the patch, this simplifies to:
frsqrtes 0, 1
addis 3, 2, .LCPI4_1@toc@ha
fres 5, 2
lfs 4, .LCPI4_1@toc@l(3)
addis 3, 2, .LCPI4_0@toc@ha
lfs 7, .LCPI4_0@toc@l(3)
fnmsubs 13, 1, 4, 1
fmuls 6, 0, 0
fnmsubs 2, 2, 5, 7
fmadds 1, 13, 6, 4
fmadds 2, 5, 2, 5
fmuls 0, 0, 1
fmuls 0, 0, 2
fmuls 1, 3, 0
blr
Differential Revision: http://reviews.llvm.org/D5628
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219139 91177308-0d34-0410-b5e6-96231b3b80d8
2014-10-06 19:31:18 +00:00
|
|
|
; Recognize that this is rsqrt(a) * rcp(b) * c,
|
|
|
|
; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
|
|
|
|
define float @rsqrt_fmul(float %a, float %b, float %c) {
|
|
|
|
%x = call float @llvm.sqrt.f32(float %a)
|
|
|
|
%y = fmul float %x, %b
|
|
|
|
%z = fdiv float %c, %y
|
|
|
|
ret float %z
|
|
|
|
|
|
|
|
; CHECK: @rsqrt_fmul
|
|
|
|
; CHECK-DAG: frsqrtes
|
|
|
|
; CHECK-DAG: fres
|
|
|
|
; CHECK-DAG: fnmsubs
|
|
|
|
; CHECK-DAG: fmuls
|
|
|
|
; CHECK-DAG: fnmsubs
|
|
|
|
; CHECK-DAG: fmadds
|
|
|
|
; CHECK-DAG: fmadds
|
|
|
|
; CHECK: fmuls
|
|
|
|
; CHECK-NEXT: fmuls
|
|
|
|
; CHECK-NEXT: fmuls
|
|
|
|
; CHECK-NEXT: blr
|
|
|
|
|
|
|
|
; CHECK-SAFE: @rsqrt_fmul
|
|
|
|
; CHECK-SAFE: fsqrts
|
|
|
|
; CHECK-SAFE: fmuls
|
|
|
|
; CHECK-SAFE: fdivs
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
2013-04-03 04:01:11 +00:00
|
|
|
define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
|
|
|
|
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
|
|
|
%r = fdiv <4 x float> %a, %x
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
|
|
|
; CHECK: @hoo
|
|
|
|
; CHECK: vrsqrtefp
|
|
|
|
|
|
|
|
; CHECK-SAFE: @hoo
|
|
|
|
; CHECK-SAFE-NOT: vrsqrtefp
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @foo2(double %a, double %b) nounwind {
|
|
|
|
%r = fdiv double %a, %b
|
|
|
|
ret double %r
|
|
|
|
|
|
|
|
; CHECK: @foo2
|
2013-05-16 16:15:18 +00:00
|
|
|
; CHECK-DAG: fre
|
|
|
|
; CHECK-DAG: fnmsub
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: fmadd
|
2014-09-26 23:01:47 +00:00
|
|
|
; CHECK-NEXT: fnmsub
|
|
|
|
; CHECK-NEXT: fmadd
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: blr
|
2013-04-03 04:01:11 +00:00
|
|
|
|
|
|
|
; CHECK-SAFE: @foo2
|
|
|
|
; CHECK-SAFE: fdiv
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
|
|
|
define float @goo2(float %a, float %b) nounwind {
|
|
|
|
%r = fdiv float %a, %b
|
|
|
|
ret float %r
|
|
|
|
|
|
|
|
; CHECK: @goo2
|
2013-05-16 16:15:18 +00:00
|
|
|
; CHECK-DAG: fres
|
|
|
|
; CHECK-DAG: fnmsubs
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: fmadds
|
2014-09-26 23:01:47 +00:00
|
|
|
; CHECK-NEXT: fmuls
|
|
|
|
; CHECK-NEXT: blr
|
2013-04-03 04:01:11 +00:00
|
|
|
|
|
|
|
; CHECK-SAFE: @goo2
|
|
|
|
; CHECK-SAFE: fdivs
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
|
|
|
|
%r = fdiv <4 x float> %a, %b
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
|
|
|
; CHECK: @hoo2
|
|
|
|
; CHECK: vrefp
|
|
|
|
|
|
|
|
; CHECK-SAFE: @hoo2
|
|
|
|
; CHECK-SAFE-NOT: vrefp
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @foo3(double %a) nounwind {
|
|
|
|
%r = call double @llvm.sqrt.f64(double %a)
|
|
|
|
ret double %r
|
|
|
|
|
|
|
|
; CHECK: @foo3
|
2013-09-12 19:04:12 +00:00
|
|
|
; CHECK: fcmpu
|
2013-05-16 16:15:18 +00:00
|
|
|
; CHECK-DAG: frsqrte
|
|
|
|
; CHECK-DAG: fnmsub
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: fmul
|
2014-09-26 23:01:47 +00:00
|
|
|
; CHECK-NEXT: fmadd
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: fmul
|
|
|
|
; CHECK-NEXT: fmadd
|
|
|
|
; CHECK-NEXT: fmul
|
2014-10-09 21:26:35 +00:00
|
|
|
; CHECK-NEXT: fmul
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: blr
|
|
|
|
|
|
|
|
; CHECK-SAFE: @foo3
|
|
|
|
; CHECK-SAFE: fsqrt
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
|
|
|
define float @goo3(float %a) nounwind {
|
|
|
|
%r = call float @llvm.sqrt.f32(float %a)
|
|
|
|
ret float %r
|
|
|
|
|
|
|
|
; CHECK: @goo3
|
2013-09-12 19:04:12 +00:00
|
|
|
; CHECK: fcmpu
|
2013-05-16 16:15:18 +00:00
|
|
|
; CHECK-DAG: frsqrtes
|
|
|
|
; CHECK-DAG: fnmsubs
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: fmuls
|
2014-09-26 23:01:47 +00:00
|
|
|
; CHECK-NEXT: fmadds
|
|
|
|
; CHECK-NEXT: fmuls
|
2014-10-09 21:26:35 +00:00
|
|
|
; CHECK-NEXT: fmuls
|
2013-04-03 04:01:11 +00:00
|
|
|
; CHECK: blr
|
|
|
|
|
|
|
|
; CHECK-SAFE: @goo3
|
|
|
|
; CHECK-SAFE: fsqrts
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @hoo3(<4 x float> %a) nounwind {
|
|
|
|
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
|
|
|
ret <4 x float> %r
|
|
|
|
|
|
|
|
; CHECK: @hoo3
|
|
|
|
; CHECK: vrsqrtefp
|
2013-09-12 19:04:12 +00:00
|
|
|
; CHECK-DAG: vcmpeqfp
|
2013-04-03 04:01:11 +00:00
|
|
|
|
|
|
|
; CHECK-SAFE: @hoo3
|
|
|
|
; CHECK-SAFE-NOT: vrsqrtefp
|
|
|
|
; CHECK-SAFE: blr
|
|
|
|
}
|
|
|
|
|