Compare commits

...

2 Commits

Author SHA1 Message Date
Stephen Heumann b21a51ba33 Optimize lgamma.
This includes adjustments that make it a little faster for most input values, but especially for |x| < 7. The impact on accuracy should be minimal.
2023-05-22 18:05:15 -05:00
Stephen Heumann 97a295522c Implement lgamma (C99).
This uses an approximation based on the Stirling series for most positive values, but uses separate rational approximations for greater accuracy near 1 and 2. A reflection formula is used for negative values.
2023-05-21 18:24:01 -05:00
2 changed files with 372 additions and 0 deletions

367
math2.asm
View File

@ -1906,6 +1906,373 @@ ret creturn 2:x return it
rtl
end
****************************************************************
*
* double lgamma(double x);
*
* Computes the natural logarithm of the absolute value of the
* gamma function of x.
*
* The rational approximations used near 1 and 2 are from
* W. J. Cody and K. E. Hillstrom, "Chebyshev Approximations
* for the Natural Logarithm of the Gamma Function".
* For other positive x values, a computation based on
* Stirling's formula is used.
*
****************************************************************
*
lgamma start
lgammaf entry
lgammal entry
using MathCommon2
csubroutine (10:x),0
phb
phk
plb
pha save env & set to default
tsc
inc a
pea 0
pha
FPROCENTRY
stz reflect assume no reflection
; For x < 0, lgamma(x) = ln(pi) - ln(abs(x*sin(pi*x))) - lgamma(-x)
lda x+8 if x is negative then
jpl positive
lda x if x != -0 and x != -inf then
ora x+2
ora x+4
ora x+6
jeq abs_x
inc reflect flag to do reflection
ldx #8 neg_adj := x
lp1 lda x,X
sta neg_adj,X
dex
dex
bpl lp1
ph4 #two neg_adj := neg_adj REM 2
ph4 #neg_adj
FREMI
ph4 #pi neg_adj := sin(neg_adj*pi)
ph4 #neg_adj
FMULX
ph4 #neg_adj
FSINX
tdc neg_adj := neg_adj * x
clc
adc #x
pea 0
pha
ph4 #neg_adj
FMULX
asl neg_adj+8 neg_adj := abs(neg_adj)
lsr neg_adj+8
ph4 #neg_adj neg_adj := ln(neg_adj)
FLNX
ph4 #ln_pi neg_adj := neg_adj - ln(pi)
ph4 #neg_adj
FSUBX
lda neg_adj+8 if neg_adj is inf/nan then
asl a
cmp #32767*2
bne abs_x
ldx #8 z := neg_adj
lp2 lda neg_adj,X
sta z,X
dex
dex
bpl lp2
brl ret_negz return -z
abs_x asl x+8 x := abs(x)
lsr x+8
positive ldx #8 t1 := x
lp3 lda x,X
sta t1,X
dex
dex
bpl lp3
ph4 #rat2_hi if x is near 1 or 2 then
ph4 #t1
FCMPS
jvc not_near_1_or_2
; Rational approximation for x near 2
ph4 #rat2_lo if x is near 2 then
ph4 #t1
FCMPS
bvs small
ph4 #z z := P2(t1)
pea 7
ph4 #P2
jsl poly
ph4 #y y := Q2(t1)
pea 7
ph4 #Q2
jsl poly
ph4 #two N := 2
bra finish_rational_approx
; Rational approximation for x near 1
small ph4 #rat1_hi else if x is near 1 then
ph4 #t1
FCMPS
jvc not_near_1_or_2
ph4 #rat1_lo
ph4 #t1
FCMPS
bvs not_near_1_or_2
ph4 #z z := P1(t1)
pea 7
ph4 #P1
jsl poly
ph4 #y y := Q1(t1)
pea 7
ph4 #Q1
jsl poly
ph4 #one N := 1
finish_rational_approx anop
ph4 #t1 t1 := x - N
FSUBI
ph4 #y z := z/y
ph4 #z
FDIVX
ph4 #t1 z := z * t1
ph4 #z
FMULX
brl chk_reflect goto chk_reflect
; Calculate based on Stirling's formula for other x values
not_near_1_or_2 anop
stz scaled assume no scaling used
stz z z := 1.0
stz z+2
stz z+4
lda #$8000
sta z+6
lda #16383
sta z+8
chk_big ph4 #cutoff while x < 9.41796875 do
tdc
clc
adc #x
pea 0
pha
FCMPS
bvc stirling
inc scaled flag that scaling was used
tdc z := z * x
clc
adc #x
pea 0
pha
ph4 #z
FMULX
ph4 #one x := x + 1
tdc
clc
adc #x
pea 0
pha
FADDI
bra chk_big
stirling ldx #8 t1 := x
lp4 lda x,X y := x
sta t1,X
sta y,X
dex
dex
bpl lp4
ph4 #t1 t1 := ln(t1)
FLNX
ph4 #one_half y := y - 1/2
ph4 #y
FSUBS
ph4 #t1 y := t1 * y
ph4 #y
FMULX
lda y+8 if y is not inf/nan then
asl a
cmp #32767*2
beq stirling_poly
tdc y := y - x
clc
adc #x
pea 0
pha
ph4 #y
FSUBX
ph4 #half_ln_2pi y := y + 0.5*ln(2pi)
ph4 #y
FADDX
lda scaled if scaled then
beq stirling_poly
ph4 #z z := ln(z)
FLNX
ph4 #z y := y - z
ph4 #y
FSUBX
stirling_poly anop
ldx #8 t1 := x
lp5 lda x,X
sta t1,X
dex
dex
bpl lp5
pea -2 t1 := 1/t1^2
ph4 #t1
FXPWRI
ph4 #z z := P_stirling(t1)
pea 8
ph4 #P_stirling
jsl poly
tdc z := z / x
clc
adc #x
pea 0
pha
ph4 #z
FDIVX
ph4 #y z := y + z
ph4 #z
FADDX
chk_reflect anop
lda reflect if reflection is needed then
beq done
ph4 #neg_adj z := z + neg_adj
ph4 #z
FADDX
ret_negz lda z+8 z := -z
eor #$8000
sta z+8
done FPROCEXIT restore env & raise any new exceptions
plb
creturn 10:z return a pointer to the result
; constants
cutoff dc f'9.41796875' above this, just use Stirling's formula
;low/high limits for use of rational approximations near 1 and 2
rat1_lo dc f'0.97265625'
rat1_hi dc f'1.02880859375'
rat2_lo dc f'1.76220703125'
rat2_hi dc f'2.208984375'
one_half dc f'0.5'
half_ln_2pi dc e'0.9189385332046727417803297'
one dc i2'1'
two dc i2'2'
ln_pi dc e'1.14472988584940017414342735'
pi dc e'3.1415926535897932384626433'
; coefficients for rational approximation, 0.5 <= x <= 1.5
P1 dc e'4.120843185847770031e00'
dc e'8.568982062831317339e01'
dc e'2.431752435244210223e02'
dc e'-2.617218583856145190e02'
dc e'-9.222613728801521582e02'
dc e'-5.176383498023217924e02'
dc e'-7.741064071332953034e01'
dc e'-2.208843997216182306e00'
Q1 dc e'1.000000000000000000e00'
dc e'4.564677187585907957e01'
dc e'3.778372484823942081e02'
dc e'9.513235976797059772e02'
dc e'8.460755362020782006e02'
dc e'2.623083470269460180e02'
dc e'2.443519662506311704e01'
dc e'4.097792921092615065e-01'
; coefficients for rational approximation, 1.5 <= x <= 4.0
P2 dc e'5.1550576176408171704e00'
dc e'3.7751067979721702241e02'
dc e'5.2689832559149812458e03'
dc e'1.9553605540630449846e04'
dc e'1.2043173809871640151e04'
dc e'-2.0648294205325283281e04'
dc e'-1.5086302287667250272e04'
dc e'-1.5138318341150667785e03'
Q2 dc e'1.0000000000000000000e00'
dc e'1.2890931890129576873e02'
dc e'3.0399030414394398824e03'
dc e'2.2029562144156636889e04'
dc e'5.7120255396025029854e04'
dc e'5.2622863838411992470e04'
dc e'1.4402090371700852304e04'
dc e'6.9832741405735102159e02'
; coefficients for Stirling series approximation
P_stirling anop
dc e'0.1796443723688305731649385'
dc e'-0.02955065359477124183006536'
dc e'0.006410256410256410256410256'
dc e'-0.001917526917526917526917527'
dc e'0.0008417508417508417508417508'
dc e'-0.0005952380952380952380952381'
dc e'0.0007936507936507936507936508'
dc e'-0.002777777777777777777777778'
dc e'0.08333333333333333333333333'
; temporaries/return values
y ds 10
z ds 10
neg_adj ds 10 adjustment for negative x
reflect ds 2 reflection flag
scaled ds 2 scaling flag
end
****************************************************************
*
* long long llrint(double x);

View File

@ -748,3 +748,8 @@
.c
~restm
mend
macro
&l jvc &bp
&l bvs *+5
brl &bp
mend