mac-rom/OS/FPUEmulation/InvTrig.a
Elliot Nunn 0ba83392d4 Bring in CubeE sources
Resource forks are included only for .rsrc files. These are DeRezzed into their data fork. 'ckid' resources, from the Projector VCS, are not included.

The Tools directory, containing mostly junk, is also excluded.
2017-09-20 18:04:16 +08:00

569 lines
15 KiB
Plaintext

;
; File: InvTrig.a
;
; Contains: Routines to emulate inverse trig functions
;
; Originally Written by: Motorola Inc.
; Adapted to Apple/MPW: Jon Okada
;
; Copyright: © 1990, 1991 by Apple Computer, Inc., all rights reserved.
;
; This file is used in these builds: Mac32
;
; Change History (most recent first):
;
; <2> 3/30/91 BG Rolling in Jon Okada's latest changes.
; <1> 12/14/90 BG First checked into TERROR/BBS.
; invtrig.a
; Based upon Motorola files 'sacos.sa', 'sasin.sa', and 'satan.sa'.
; sacos
; CHANGE LOG
; 04 Jan 91 JPO Moved constants PI and PIBY2 to file 'constants.a'.
;
*
* sacos.sa 3.1 12/10/90
*
* Description: The entry point sAcos computes the inverse cosine of
* an input argument; sAcosd does the same except for denormalized
* input.
*
* Input: Double-extended number X in location pointed to
* by address register a0.
*
* Output: The value arccos(X) returned in floating-point register Fp0.
*
* Accuracy and Monotonicity: The returned result is within 3 ulps in
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
* result is subsequently rounded to double precision. The
* result is provably monotonic in double precision.
*
* Speed: The program sCOS takes approximately 310 cycles.
*
* Algorithm:
*
* ACOS
* 1. If |X| >= 1, go to 3.
*
* 2. (|X| < 1) Calculate acos(X) by
* z := (1-X) / (1+X)
* acos(X) = 2 * atan( sqrt(z) ).
* Exit.
*
* 3. If |X| > 1, go to 5.
*
* 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
*
* 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
* Exit.
*
* Copyright (C) Motorola, Inc. 1990
* All Rights Reserved
*
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
* The copyright notice above does not evidence any
* actual or intended publication of such source code.
* SACOS IDNT 2,1 Motorola 040 Floating Point Software Package
sacosd:
*--ACOS(X) = PI/2 FOR DENORMALIZED X
fmove.l d1,fpcr ;...load user's rounding mode/precision
FMOVE.X PIBY2,FP0
bra t_frcinx
sacos:
FMOVE.X (a0),FP0 ;...LOAD INPUT
move.l (a0),d0 ;...pack exponent with upper 16 fraction
move.w 4(a0),d0
ANDI.L #$7FFFFFFF,D0
CMPI.L #$3FFF8000,D0
BGE.B ACOSBIG
*--THIS IS THE USUAL CASE, |X| < 1
*--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
FMOVE.S #"$3F800000",FP1
FADD.X FP0,FP1 ;...1+X
FNEG.X FP0 ;... -X
FADD.S #"$3F800000",FP0 ;...1-X
FDIV.X FP1,FP0 ;...(1-X)/(1+X)
FSQRT.X FP0 ;...SQRT((1-X)/(1+X))
fmovem.x fp0,(a0) ;...overwrite input
move.l d1,-(sp) ;save original users fpcr
clr.l d1
bsr satan ...ATAN(SQRT([1-X]/[1+X]))
fMOVE.L (sp)+,fpcr ;restore users exceptions
FADD.X FP0,FP0 ...2 * ATAN( STUFF )
bra t_frcinx
ACOSBIG:
FABS.X FP0
FCMP.S #"$3F800000",FP0
fbgt t_operr ;cause an operr exception
*--|X| = 1, ACOS(X) = 0 OR PI
move.l (a0),d0 ;...pack exponent with upper 16 fraction
move.w 4(a0),d0
CMP.L #0,D0 ;D0 has original exponent+fraction
BGT.B ACOSP1
*--X = -1
*Returns PI and inexact exception
FMOVE.X PI,FP0
FMOVE.L d1,FPCR
FADD.S #"$00800000",FP0 ;cause an inexact exception to be put
* ;into the 040 - will not trap until next
* ;fp inst.
bra t_frcinx
ACOSP1:
FMOVE.L d1,FPCR
FMOVE.S #"$00000000",FP0
rts ;Facos of +1 is exact
; sasin
; CHANGE LOG:
; 04 Jan 91 JPO Deleted constant PIBY2 (already exists in file 'constants.a').
;
*
* sasin.sa 3.1 12/10/90
*
* Description: The entry point sAsin computes the inverse sine of
* an input argument; sAsind does the same except for denormalized
* input.
*
* Input: Double-extended number X in location pointed to
* by address register a0.
*
* Output: The value arcsin(X) returned in floating-point register Fp0.
*
* Accuracy and Monotonicity: The returned result is within 3 ulps in
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
* result is subsequently rounded to double precision. The
* result is provably monotonic in double precision.
*
* Speed: The program sASIN takes approximately 310 cycles.
*
* Algorithm:
*
* ASIN
* 1. If |X| >= 1, go to 3.
*
* 2. (|X| < 1) Calculate asin(X) by
* z := sqrt( [1-X][1+X] )
* asin(X) = atan( x / z ).
* Exit.
*
* 3. If |X| > 1, go to 5.
*
* 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.
*
* 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
* Exit.
*
* Copyright (C) Motorola, Inc. 1990
* All Rights Reserved
*
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
* The copyright notice above does not evidence any
* actual or intended publication of such source code.
* SASIN IDNT 2,1 Motorola 040 Floating Point Software Package
sasind:
*--ASIN(X) = X FOR DENORMALIZED X
bra t_extdnrm
sasin:
FMOVE.X (a0),FP0 ;...LOAD INPUT
move.l (a0),d0
move.w 4(a0),d0
ANDI.L #$7FFFFFFF,D0
CMPI.L #$3FFF8000,D0
BGE.B asinbig
*--THIS IS THE USUAL CASE, |X| < 1
*--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
FMOVE.S #"$3F800000",FP1
FSUB.X FP0,FP1 ;...1-X
fmovem.x fp2,-(a7)
FMOVE.S #"$3F800000",FP2
FADD.X FP0,FP2 ;...1+X
FMUL.X FP2,FP1 ;...(1+X)(1-X)
fmovem.x (a7)+,fp2
FSQRT.X FP1 ;...SQRT([1-X][1+X])
FDIV.X FP1,FP0 ;...X/SQRT([1-X][1+X])
fmovem.x fp0,(a0)
bsr.b satan
bra t_frcinx
asinbig:
FABS.X FP0 ...|X|
FCMP.S #"$3F800000",FP0
fbgt t_operr ;cause an operr exception
*--|X| = 1, ASIN(X) = +- PI/2.
FMOVE.X PIBY2,FP0
move.l (a0),d0
ANDI.L #$80000000,D0 ;...SIGN BIT OF X
ORI.L #$3F800000,D0 ;...+-1 IN SGL FORMAT
MOVE.L D0,-(sp) ;...push SIGN(X) IN SGL-FMT
FMOVE.L d1,FPCR
FMUL.S (sp)+,FP0
bra t_frcinx
; satan
; CHANGE LOG:
; 04 Jan 91 JPO Deleted constants BOUNDS1 and ONE because they are not used.
; Deleted constants PPIBY2, NPIBY2, and NTINY, changing code
; to refer to constants PIBY2, MPIBY2 (in file 'dofunc.a'),
; and PTINY, respectively. Moved constants ATANA1-ATANA3,
; ATANB1-ATANB6, ATANC1-ATANC5, PTINY, and table ATANTBL to
; file 'constants.a'. Changed names of local variables X,
; XDCARE, XFRAC, and XFRACLO to XATAN, XATANDC, XATANF, and
; XATANFL, respectively.
;
*
* satan.sa 3.1 12/10/90
*
* The entry point satan computes the arctagent of an
* input value. satand does the same except the input value is a
* denormalized number.
*
* Input: Double-extended value in memory location pointed to by address
* register a0.
*
* Output: Arctan(X) returned in floating-point register Fp0.
*
* Accuracy and Monotonicity: The returned result is within 2 ulps in
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
* result is subsequently rounded to double precision. The
* result is provably monotonic in double precision.
*
* Speed: The program satan takes approximately 160 cycles for input
* argument X such that 1/16 < |X| < 16. For the other arguments,
* the program will run no worse than 10% slower.
*
* Algorithm:
* Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
*
* Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
* Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
* of X with a bit-1 attached at the 6-th bit position. Define u
* to be u = (X-F) / (1 + X*F).
*
* Step 3. Approximate arctan(u) by a polynomial poly.
*
* Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
* calculated beforehand. Exit.
*
* Step 5. If |X| >= 16, go to Step 7.
*
* Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
*
* Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
* Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
*
* Copyright (C) Motorola, Inc. 1990
* All Rights Reserved
*
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
* The copyright notice above does not evidence any
* actual or intended publication of such source code.
* satan IDNT 2,1 Motorola 040 Floating Point Software Package
;X equ FP_SCR1 ; deleted <1/4/91, JPO>
;XDCARE equ X+2
;XFRAC equ X+4
;XFRACLO equ X+8
XATAN equ FP_SCR1 ; <1/4/91, JPO>
XATANDC equ XATAN+2
XATANF equ XATAN+4
XATANFL equ XATAN+8
ATANF equ FP_SCR2
ATANFHI equ ATANF+4
ATANFLO equ ATANF+8
satand:
*--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
bra t_extdnrm
satan:
*--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
FMOVE.X (A0),FP0 ;...LOAD INPUT
MOVE.L (A0),D0
MOVE.W 4(A0),D0
FMOVE.X FP0,XATAN(a6) ; <1/4/91, JPO>
ANDI.L #$7FFFFFFF,D0
CMPI.L #$3FFB8000,D0 ;...|X| >= 1/16?
BGE.B ATANOK1
BRA.W ATANSM
ATANOK1:
CMPI.L #$4002FFFF,D0 ;...|X| < 16 ?
BLE.B ATANMAIN
BRA.W ATANBIG
*--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
*--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
*--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
*--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
*--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
*--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
*--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
*--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
*--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
*--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
*--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
*--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
*--WILL INVOLVE A VERY LONG POLYNOMIAL.
*--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
*--WE CHOSE F TO BE +-2^K * 1.BBBB1
*--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
*--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
*--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
*-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
ATANMAIN:
MOVE.W #$0000,XATANDC(a6) ;...CLEAN UP X JUST IN CASE <1/4/91, JPO>
ANDI.L #$F8000000,XATANF(a6) ;...FIRST 5 BITS <1/4/91, JPO>
ORI.L #$04000000,XATANF(a6) ;...SET 6-TH BIT TO 1 <1/4/91, JPO>
MOVE.L #$00000000,XATANFL(a6) ;...LOCATION OF X IS NOW F <1/4/91, JPO>
FMOVE.X FP0,FP1 ;...FP1 IS X
FMUL.X XATAN(a6),FP1 ;...FP1 IS X*F, NOTE THAT X*F > 0 <1/4/91, JPO>
FSUB.X XATAN(a6),FP0 ;...FP0 IS X-F <1/4/91, JPO>
FADD.S #"$3F800000",FP1 ...FP1 IS 1 + X*F
FDIV.X FP1,FP0 ;...FP0 IS U = (X-F)/(1+X*F)
*--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
*--CREATE ATAN(F) AND STORE IT IN ATANF, AND
*--SAVE REGISTERS FP2.
MOVE.L d2,-(a7) ;...SAVE d2 TEMPORARILY
MOVE.L d0,d2 ;...THE EXPO AND 16 BITS OF X
ANDI.L #$00007800,d0 ;...4 VARYING BITS OF F'S FRACTION
ANDI.L #$7FFF0000,d2 ;...EXPONENT OF F
SUBI.L #$3FFB0000,d2 ;...K+4
ASR.L #1,d2
ADD.L d2,d0 ;...THE 7 BITS IDENTIFYING F
ASR.L #7,d0 ;...INDEX INTO TBL OF ATAN(|F|)
LEA ATANTBL,a1
ADDA.L d0,a1 ;...ADDRESS OF ATAN(|F|)
MOVE.L (a1)+,ATANF(a6)
MOVE.L (a1)+,ATANFHI(a6)
MOVE.L (a1)+,ATANFLO(a6) ;...ATANF IS NOW ATAN(|F|)
MOVE.L XATAN(a6),d0 ;...LOAD SIGN AND EXPO. AGAIN <1/4/91, JPO>
ANDI.L #$80000000,d0 ;...SIGN(F)
OR.L d0,ATANF(a6) ;...ATANF IS NOW SIGN(F)*ATAN(|F|)
MOVE.L (a7)+,d2 ;...RESTORE d2
*--THAT'S ALL I HAVE TO DO FOR NOW,
*--BUT ALAS, THE DIVIDE IS STILL CRANKING!
*--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
*--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
*--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
*--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
*--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
*--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
*--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
FMOVE.X FP0,FP1
FMUL.X FP1,FP1
FMOVE.D ATANA3,FP2
FADD.X FP1,FP2 ;...A3+V
FMUL.X FP1,FP2 ;...V*(A3+V)
FMUL.X FP0,FP1 ;...U*V
FADD.D ATANA2,FP2 ;...A2+V*(A3+V)
FMUL.D ATANA1,FP1 ;...A1*U*V
FMUL.X FP2,FP1 ;...A1*U*V*(A2+V*(A3+V))
FADD.X FP1,FP0 ;...ATAN(U), FP1 RELEASED
FMOVE.L d1,FPCR ;restore users exceptions
FADD.X ATANF(a6),FP0 ;...ATAN(X)
bra t_frcinx
ATANBORS:
*--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
*--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
CMPI.L #$3FFF8000,d0
BGT.W ATANBIG ...I.E. |X| >= 16
ATANSM:
*--|X| <= 1/16
*--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
*--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
*--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
*--WHERE Y = X*X, AND Z = Y*Y.
CMPI.L #$3FD78000,d0
BLT.B ATANTINY
*--COMPUTE POLYNOMIAL
FMUL.X FP0,FP0 ...FP0 IS Y = X*X
MOVE.W #$0000,XATANDC(a6) ; <1/4/91, JPO>
FMOVE.X FP0,FP1
FMUL.X FP1,FP1 ;...FP1 IS Z = Y*Y
FMOVE.D ATANB6,FP2
FMOVE.D ATANB5,FP3
FMUL.X FP1,FP2 ;...Z*B6
FMUL.X FP1,FP3 ;...Z*B5
FADD.D ATANB4,FP2 ;...B4+Z*B6
FADD.D ATANB3,FP3 ;...B3+Z*B5
FMUL.X FP1,FP2 ;...Z*(B4+Z*B6)
FMUL.X FP3,FP1 ;...Z*(B3+Z*B5)
FADD.D ATANB2,FP2 ;...B2+Z*(B4+Z*B6)
FADD.D ATANB1,FP1 ;...B1+Z*(B3+Z*B5)
FMUL.X FP0,FP2 ;...Y*(B2+Z*(B4+Z*B6))
FMUL.X XATAN(a6),FP0 ;...X*Y <1/4/91, JPO>
FADD.X FP2,FP1 ;...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
FMUL.X FP1,FP0 ;...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
FMOVE.L d1,FPCR ;restore users exceptions
FADD.X XATAN(a6),FP0 ; <1/4/91, JPO>
bra t_frcinx
ATANTINY:
*--|X| < 2^(-40), ATAN(X) = X
MOVE.W #$0000,XATANDC(a6) ; <1/4/91, JPO>
FMOVE.L d1,FPCR ;restore users exceptions
FMOVE.X XATAN(a6),FP0 ;last inst - possible exception set <1/4/91, JPO>
bra t_frcinx
ATANBIG:
*--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
*--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
CMPI.L #$40638000,d0
BGT.W ATANHUGE
*--APPROXIMATE ATAN(-1/X) BY
*--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
*--THIS CAN BE RE-WRITTEN AS
*--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
FMOVE.S #"$BF800000",FP1 ;...LOAD -1
FDIV.X FP0,FP1 ;...FP1 IS -1/X
*--DIVIDE IS STILL CRANKING
FMOVE.X FP1,FP0 ;...FP0 IS X'
FMUL.X FP0,FP0 ;...FP0 IS Y = X'*X'
FMOVE.X FP1,XATAN(a6) ;...X IS REALLY X' <1/4/91, JPO>
FMOVE.X FP0,FP1
FMUL.X FP1,FP1 ;...FP1 IS Z = Y*Y
FMOVE.D ATANC5,FP3
FMOVE.D ATANC4,FP2
FMUL.X FP1,FP3 ;...Z*C5
FMUL.X FP1,FP2 ;...Z*B4
FADD.D ATANC3,FP3 ;...C3+Z*C5
FADD.D ATANC2,FP2 ;...C2+Z*C4
FMUL.X FP3,FP1 ;...Z*(C3+Z*C5), FP3 RELEASED
FMUL.X FP0,FP2 ;...Y*(C2+Z*C4)
FADD.D ATANC1,FP1 ;...C1+Z*(C3+Z*C5)
FMUL.X XATAN(a6),FP0 ;...X'*Y <1/4/91, JPO>
FADD.X FP2,FP1 ;...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
FMUL.X FP1,FP0 ;...X'*Y*([B1+Z*(B3+Z*B5)]
* ;... +[Y*(B2+Z*(B4+Z*B6))])
FADD.X XATAN(a6),FP0 ; <1/4/91, JPO>
FMOVE.L d1,FPCR ;restore users exceptions
btst.b #7,(a0)
beq.b pos_big
neg_big:
; FADD.X NPIBY2,FP0 ; deleted <1/4/91, JPO>
FADD.X MPIBY2,FP0 ; <1/4/91, JPO>
bra t_frcinx
pos_big:
; FADD.X PPIBY2,FP0 ; deleted <1/4/91, JPO>
FADD.X PIBY2,FP0 ; <1/4/91, JPO>
bra t_frcinx
ATANHUGE:
*--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
btst.b #7,(a0)
beq.b pos_huge
neg_huge:
; FMOVE.X NPIBY2,fp0 ; deleted <1/4/91, JPO>
FMOVE.X MPIBY2,fp0 ; <1/4/91, JPO>
fmove.l d1,fpcr
; fsub.x NTINY,fp0 ; deleted <1/4/91, JPO>
fadd.x PTINY,fp0 ; <1/4/91, JPO>
bra t_frcinx
pos_huge:
; FMOVE.X PPIBY2,fp0 ; deleted <1/4/91, JPO>
FMOVE.X PIBY2,fp0 ; <1/4/91, JPO>
fmove.l d1,fpcr
fsub.x PTINY,fp0
bra t_frcinx