supermario/base/SuperMarioProj.1994-02-09/OS/FPUEmulation/Trig.a

1188 lines
32 KiB
Plaintext
Raw Normal View History

2019-06-29 15:17:50 +00:00
;
; File: Trig.a
;
; Contains: Routines to emulate trigonometric functions
;
; Originally Written by: Motorola Inc.
; Adapted to Apple/MPW: Jon Okada
;
; Copyright: © 1990, 1991 by Apple Computer, Inc., all rights reserved.
;
; This file is used in these builds: Mac32
;
; Change History (most recent first):
;
; <3+> 6/24/91 BG Modified trig argument reduction routine per Motorola
; version 2.0 to fix sign bug for FCOS of large argument
; (order of 10**8).
; <3> 5/24/91 BG Cleanup and optimization to the Sin and Tan routines.
; <2> 3/30/91 BG Rolling in Jon Okada's latest changes.
; <1> 12/14/90 BG First checked into TERROR/BBS.
; trig.a
; Based upon Motorola files 'ssin.sa', 'sto_res.sa', and 'stan.sa'.
; ssin
; CHANGE LOG:
; 07 Jan 91 JPO Deleted constants BOUNDS1, INVTWOPI, TWOPI1, and
; TWOPI2 (not referenced). Moved constants to
; file 'constants.a'. Deleted variable equate for
; XFRAC (not referenced). Renamed variable labels X,
; XDCARE, and N to XSIN, XSINDC, and NSIN, respectively.
; Deleted labels "NODD" and "c_is_fp3" (not referenced).
; Renamed labels REDUCEX, LOOP, WORK, LASTLOOP, and
; RESTORE to SREDUCEX, SLOOP, SWORK, SLASTLOOP, and
; SRESTORE, respectively. Appended routine "sto_cos"
; from Motorola file 'sto_res.sa'.
; 07 May 91 JPO Renamed variable NSIN to NTRIG. Deleted variable NTAN
; and changed all references to it to references to NTRIG.
; Removed routines "SINBORS", "SCBORS", and "TANBORS"
; (not referenced). Removed routine "SREDUCEX" and converted
; all branches to it to subroutine calls to "REDUCEX".
; Converted routine "REDUCEX" to a subroutine and all
; branches to it to subroutine calls. Added code in
; "REDUCEX" to do a single remainder step if the input
; is very large in order to prevent unwanted overflow.
; 13 Jun 91 JPO Modified trig argument reduction routine per Motorola
; version 2.0 to fix sign bug for FCOS of large argument
; (order of 10**8).
;
*
* ssin.sa 3.1 12/10/90
*
* The entry point sSIN computes the sine of an input argument
* sCOS computes the cosine, and sSINCOS computes both. The
* corresponding entry points with a "d" computes the same
* corresponding function values for denormalized inputs.
*
* Input: Double-extended number X in location pointed to
* by address register a0.
*
* Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or
* COS is requested. Otherwise, for SINCOS, sin(X) is returned
* in Fp0, and cos(X) is returned in Fp1.
*
* Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
*
* Accuracy and Monotonicity: The returned result is within 1 ulp in
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
* result is subsequently rounded to double precision. The
* result is provably monotonic in double precision.
*
* Speed: The programs sSIN and sCOS take approximately 150 cycles for
* input argument X such that |X| < 15Pi, which is the the usual
* situation. The speed for sSINCOS is approximately 190 cycles.
*
* Algorithm:
*
* SIN and COS:
* 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
*
* 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
*
* 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
* k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte
* k by k := k + AdjN.
*
* 4. If k is even, go to 6.
*
* 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
* where cos(r) is approximated by an even polynomial in r,
* 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r.
* Exit.
*
* 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
* where sin(r) is approximated by an odd polynomial in r
* r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r.
* Exit.
*
* 7. If |X| > 1, go to 9.
*
* 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
*
* 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
*
* SINCOS:
* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
*
* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
* k = N mod 4, so in particular, k = 0,1,2,or 3.
*
* 3. If k is even, go to 5.
*
* 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
* j1 exclusive or with the l.s.b. of k.
* sgn1 := (-1)**j1, sgn2 := (-1)**j2.
* SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
* sin(r) and cos(r) are computed as odd and even polynomials
* in r, respectively. Exit
*
* 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
* SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
* sin(r) and cos(r) are computed as odd and even polynomials
* in r, respectively. Exit
*
* 6. If |X| > 1, go to 8.
*
* 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
*
* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
*
* Copyright (C) Motorola, Inc. 1990
* All Rights Reserved
*
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
* The copyright notice above does not evidence any
* actual or intended publication of such source code.
* SSIN IDNT 2,1 Motorola 040 Floating Point Software Package
INARG equ FP_SCR4
;X equ FP_SCR5 ; Renamed <1/7/91, JPO>
;XDCARE equ X+2 ; Renamed <1/7/91, JPO>
;XFRAC equ X+4 ; Deleted <1/7/91, JPO>
XSIN EQU FP_SCR5 ; <1/7/91, JPO>
XSINDC EQU XSIN+2
RPRIME equ FP_SCR1
SPRIME equ FP_SCR2
POSNEG1 equ L_SCR1
TWOTO63 equ L_SCR1
ENDFLAG equ L_SCR2
;N equ L_SCR2 ; Renamed <1/7/91, JPO>
;NSIN equ L_SCR2 ; <1/7/91, JPO> - RENAMED <T3>
NTRIG equ L_SCR2 ; <5/7/91, JPO> <T3>
ADJN equ L_SCR3
ssind:
*--SIN(X) = X FOR DENORMALIZED X
bra t_extdnrm
scosd:
*--COS(X) = 1 FOR DENORMALIZED X
FMOVE.S #"$3F800000",FP0
*
* 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
*
fmove.l #0,fpsr
*
bra t_frcinx
ssin:
*--SET ADJN TO 0
MOVE.L #0,ADJN(a6)
BRA.B SINBGN
scos:
*--SET ADJN TO 1
MOVE.L #1,ADJN(a6)
SINBGN:
*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
FMOVE.X (a0),FP0 ...LOAD INPUT
MOVE.L (A0),D0
MOVE.W 4(A0),D0
FMOVE.X FP0,XSIN(a6) ; <1/7/91, JPO>
ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
BGE.B SOK1
BRA.W SINSM
SOK1:
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
BLT.B SINMAIN
; BRA.W SREDUCEX ; label renamed <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
BRA.B SINCONT
SINMAIN:
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
FMOVE.X FP0,FP1
FMUL.D TWOBYPI,FP1 ...X*2/PI
*--HIDE THE NEXT THREE INSTRUCTIONS
LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
*--FP1 IS NOW READY
; FMOVE.L FP1,NSIN(a6) ...CONVERT TO INTEGER <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
FMOVE.L FP1,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
ASL.L #4,D0
ADDA.L D0,A1 ...A1 IS THE ADDRESS OF N*PIBY2
* ...WHICH IS IN TWO PIECES Y1 & Y2
FSUB.X (A1)+,FP0 ...X-Y1
*--HIDE THE NEXT ONE
FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2
SINCONT:
*--continuation from REDUCEX
*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
ADD.L ADJN(a6),D0 ...SEE IF D0 IS ODD OR EVEN
ROR.L #1,D0 ...D0 WAS ODD IFF D0 IS NEGATIVE
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
; BLT.B COSPOLY ; DELETED <5/7/91, JPO> <T3>
BMI.B COSPOLY ; <5/7/91, JPO> <T3>
SINPOLY:
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
*--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
*--WHERE T=S*S.
*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
FMOVE.X FP0,XSIN(a6) ...X IS R <1/7/91, JPO>
FMUL.X FP0,FP0 ...FP0 IS S
*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
FMOVE.D SINA7,FP3
FMOVE.D SINA6,FP2
*--FP0 IS NOW READY
FMOVE.X FP0,FP1
FMUL.X FP1,FP1 ...FP1 IS T
*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
ROR.L #1,D0
ANDI.L #$80000000,D0
* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
EOR.L D0,XSIN(a6) ...X IS NOW R'= SGN*R <1/7/91, JPO>
FMUL.X FP1,FP3 ...TA7
FMUL.X FP1,FP2 ...TA6
FADD.D SINA5,FP3 ...A5+TA7
FADD.D SINA4,FP2 ...A4+TA6
FMUL.X FP1,FP3 ...T(A5+TA7)
FMUL.X FP1,FP2 ...T(A4+TA6)
FADD.D SINA3,FP3 ...A3+T(A5+TA7)
FADD.X SINA2,FP2 ...A2+T(A4+TA6)
FMUL.X FP3,FP1 ...T(A3+T(A5+TA7))
FMUL.X FP0,FP2 ...S(A2+T(A4+TA6))
FADD.X SINA1,FP1 ...A1+T(A3+T(A5+TA7))
FMUL.X XSIN(a6),FP0 ...R'*S <1/7/91, JPO>
FADD.X FP2,FP1 ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
*--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
FMUL.X FP1,FP0 ...SIN(R')-R'
*--FP1 RELEASED.
FMOVE.L d1,FPCR ;restore users exceptions
FADD.X XSIN(a6),FP0 ;last inst - possible exception set <1/7/91, JPO>
bra t_frcinx
COSPOLY:
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
*--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
*--WHERE T=S*S.
*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
*--AND IS THEREFORE STORED AS SINGLE PRECISION.
FMUL.X FP0,FP0 ...FP0 IS S
*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
FMOVE.D COSB8,FP2
FMOVE.D COSB7,FP3
*--FP0 IS NOW READY
FMOVE.X FP0,FP1
FMUL.X FP1,FP1 ...FP1 IS T
*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
FMOVE.X FP0,XSIN(a6) ...X IS S <1/7/91, JPO>
ROR.L #1,D0
ANDI.L #$80000000,D0
* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
FMUL.X FP1,FP2 ...TB8
*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
EOR.L D0,XSIN(a6) ...X IS NOW S'= SGN*S <1/7/91, JPO>
ANDI.L #$80000000,D0
FMUL.X FP1,FP3 ...TB7
*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
ORI.L #$3F800000,D0 ...D0 IS SGN IN SINGLE
MOVE.L D0,POSNEG1(a6)
FADD.D COSB6,FP2 ...B6+TB8
FADD.D COSB5,FP3 ...B5+TB7
FMUL.X FP1,FP2 ...T(B6+TB8)
FMUL.X FP1,FP3 ...T(B5+TB7)
FADD.D COSB4,FP2 ...B4+T(B6+TB8)
FADD.X COSB3,FP3 ...B3+T(B5+TB7)
FMUL.X FP1,FP2 ...T(B4+T(B6+TB8))
FMUL.X FP3,FP1 ...T(B3+T(B5+TB7))
FADD.X COSB2,FP2 ...B2+T(B4+T(B6+TB8))
FADD.S COSB1,FP1 ...B1+T(B3+T(B5+TB7))
FMUL.X FP2,FP0 ...S(B2+T(B4+T(B6+TB8)))
*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
*--FP2 RELEASED.
FADD.X FP1,FP0
*--FP1 RELEASED
FMUL.X XSIN(a6),FP0 ; <1/7/91, JPO>
FMOVE.L d1,FPCR ;restore users exceptions
FADD.S POSNEG1(a6),FP0 ;last inst - possible exception set
bra t_frcinx
;SINBORS: ; routine DELETED - <5/7/91, JPO> <T3>
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
*--IF |X| < 2**(-40), RETURN X OR 1.
; CMPI.L #$3FFF8000,D0 <T3>
; BGT.B SREDUCEX ; label renamed <1/7/91, JPO> <T3>
SINSM:
MOVE.L ADJN(a6),D0
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
BGT.B COSTINY
SINTINY:
MOVE.W #$0000,XSINDC(a6) ...JUST IN CASE <1/7/91, JPO>
FMOVE.L d1,FPCR ;restore users exceptions
FMOVE.X XSIN(a6),FP0 ;last inst - possible exception set <1/7/91, JPO>
bra t_frcinx
COSTINY:
FMOVE.S #"$3F800000",FP0
FMOVE.L d1,FPCR ;restore users exceptions
FSUB.S #"$00800000",FP0 ;last inst - possible exception set
bra t_frcinx
; Routine "SREDUCEX" is DELETED and replaced by subroutine "REDUCEX" <T3> thru next <T3>
; (below) <5/7/91, JPO>.
;SREDUCEX: ; label renamed <1/7/91, JPO>
*--WHEN SREDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
; FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5
; MOVE.L D2,-(A7)
; FMOVE.S #"$00000000",FP1
*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
*--integer quotient will be stored in N
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
;SLOOP: ; label renamed <1/7/91, JPO>
; FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2
; MOVE.W INARG(a6),D0
; MOVE.L D0,A1 ...save a copy of D0
; ANDI.L #$00007FFF,D0
; SUBI.L #$00003FFF,D0 ...D0 IS K
; CMPI.L #28,D0
; BLE.B SLASTLOOP ; label renamed <1/7/91, JPO>
;CONTLOOP: ; label not referenced <1/7/91, JPO>
; SUBI.L #28,D0 ...D0 IS L := K-28
; MOVE.L #0,ENDFLAG(a6)
; BRA.B SWORK ; label renamed <1/7/91, JPO>
;SLASTLOOP: ; label renamed <1/7/91, JPO>
; CLR.L D0 ...D0 IS L := 0
; MOVE.L #1,ENDFLAG(a6)
;SWORK: ; label renamed <1/7/91, JPO>
*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
*--2**L * (PIby2_1), 2**L * (PIby2_2)
; MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI
; SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI)
; MOVE.L #$A2F9836E,FP_SCR1+4(a6)
; MOVE.L #$4E44152A,FP_SCR1+8(a6)
; MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI)
; FMOVE.X FP0,FP2
; FMUL.X FP_SCR1(a6),FP2
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
*--US THE DESIRED VALUE IN FLOATING POINT.
*--HIDE SIX CYCLES OF INSTRUCTION
; MOVE.L A1,D2
; SWAP D2
; ANDI.L #$80000000,D2
; ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL
; MOVE.L D2,TWOTO63(a6)
; MOVE.L D0,D2
; ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2)
*--FP2 IS READY
; FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED
*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
; MOVE.W D2,FP_SCR2(a6)
; CLR.W FP_SCR2+2(a6)
; MOVE.L #$C90FDAA2,FP_SCR2+4(a6)
; CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1
*--FP2 IS READY
; FSUB.S TWOTO63(a6),FP2 ...FP2 is N
; ADDI.L #$00003FDD,D0
; MOVE.W D0,FP_SCR3(a6)
; CLR.W FP_SCR3+2(a6)
; MOVE.L #$85A308D3,FP_SCR3+4(a6)
; CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2
; MOVE.L ENDFLAG(a6),D0
*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
*--P2 = 2**(L) * Piby2_2
; FMOVE.X FP2,FP4
; FMul.X FP_SCR2(a6),FP4 ...W = N*P1
; FMove.X FP2,FP5
; FMul.X FP_SCR3(a6),FP5 ...w = N*P2
; FMove.X FP4,FP3
*--we want P+p = W+w but |p| <= half ulp of P
*--Then, we need to compute A := R-P and a := r-p
; FAdd.X FP5,FP3 ...FP3 is P
; FSub.X FP3,FP4 ...W-P
; FSub.X FP3,FP0 ...FP0 is A := R - P
; FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w
; FMove.X FP0,FP3 ...FP3 A
; FSub.X FP4,FP1 ...FP1 is a := r - p
*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
*--|r| <= half ulp of R.
; FAdd.X FP1,FP0 ...FP0 is R := A+a
*--No need to calculate r if this is the last loop
; CMPI.L #0,D0
; BGT.B SRESTORE ; label renamed <1/7/91, JPO>
*--Need to calculate r
; FSub.X FP0,FP3 ...A-R
; FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a
; BRA.W SLOOP
;SRESTORE: ; label renamed <1/7/91, JPO>
; FMOVE.L FP2,NSIN(a6)
; MOVE.L (A7)+,D2
; FMOVEM.X (A7)+,FP2-FP5
; MOVE.L ADJN(a6),D0
; CMPI.L #4,D0
; BLT.W SINCONT
; BRA.B SCCONT <T3>
ssincosd:
*--SIN AND COS OF X FOR DENORMALIZED X
FMOVE.S #"$3F800000",FP1
bsr sto_cos ;store cosine result
bra t_extdnrm
ssincos:
*--SET ADJN TO 4
MOVE.L #4,ADJN(a6)
FMOVE.X (a0),FP0 ...LOAD INPUT
MOVE.L (A0),D0
MOVE.W 4(A0),D0
FMOVE.X FP0,XSIN(a6) ; <1/7/91, JPO>
ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
BGE.B SCOK1
BRA.W SCSM
SCOK1:
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
BLT.B SCMAIN
; BRA.W SREDUCEX ; label renamed - DELETED <5/7/91, JPO> <T3>
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
BRA.B SCCONT ; continue below <5/7/91, JPO> <T3>
SCMAIN:
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
FMOVE.X FP0,FP1
FMUL.D TWOBYPI,FP1 ...X*2/PI
*--HIDE THE NEXT THREE INSTRUCTIONS
LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
*--FP1 IS NOW READY
; FMOVE.L FP1,NSIN(a6) ...CONVERT TO INTEGER <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
FMOVE.L FP1,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
ASL.L #4,D0
ADDA.L D0,A1 ...ADDRESS OF N*PIBY2, IN Y1, Y2
FSUB.X (A1)+,FP0 ...X-Y1
FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2
SCCONT:
*--continuation point from REDUCEX
*--HIDE THE NEXT TWO
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
ROR.L #1,D0
; CMPI.L #0,D0 ...D0 < 0 IFF N IS ODD - DELETED (unnecessary) <5/7/91, JPO> <T3>
; BGE.W NEVEN ; DELETED <5/7/91, JPO> <T3>
BPL.W NEVEN ; <5/7/91, JPO> <T3>
;NODD: ; label DELETED <1/7/91, JPO>
*--REGISTERS SAVED SO FAR: D0, A0, FP2.
FMOVE.X FP0,RPRIME(a6)
FMUL.X FP0,FP0 ...FP0 IS S = R*R
FMOVE.D SINA7,FP1 ...A7
FMOVE.D COSB8,FP2 ...B8
FMUL.X FP0,FP1 ...SA7
MOVE.L d2,-(A7)
MOVE.L D0,d2
FMUL.X FP0,FP2 ...SB8
ROR.L #1,d2
ANDI.L #$80000000,d2
FADD.D SINA6,FP1 ...A6+SA7
EOR.L D0,d2
ANDI.L #$80000000,d2
FADD.D COSB7,FP2 ...B7+SB8
FMUL.X FP0,FP1 ...S(A6+SA7)
EOR.L d2,RPRIME(a6)
MOVE.L (A7)+,d2
FMUL.X FP0,FP2 ...S(B7+SB8)
ROR.L #1,D0
ANDI.L #$80000000,D0
FADD.D SINA5,FP1 ...A5+S(A6+SA7)
MOVE.L #$3F800000,POSNEG1(a6)
EOR.L D0,POSNEG1(a6)
FADD.D COSB6,FP2 ...B6+S(B7+SB8)
FMUL.X FP0,FP1 ...S(A5+S(A6+SA7))
FMUL.X FP0,FP2 ...S(B6+S(B7+SB8))
FMOVE.X FP0,SPRIME(a6)
FADD.D SINA4,FP1 ...A4+S(A5+S(A6+SA7))
EOR.L D0,SPRIME(a6)
FADD.D COSB5,FP2 ...B5+S(B6+S(B7+SB8))
FMUL.X FP0,FP1 ...S(A4+...)
FMUL.X FP0,FP2 ...S(B5+...)
FADD.D SINA3,FP1 ...A3+S(A4+...)
FADD.D COSB4,FP2 ...B4+S(B5+...)
FMUL.X FP0,FP1 ...S(A3+...)
FMUL.X FP0,FP2 ...S(B4+...)
FADD.X SINA2,FP1 ...A2+S(A3+...)
FADD.X COSB3,FP2 ...B3+S(B4+...)
FMUL.X FP0,FP1 ...S(A2+...)
FMUL.X FP0,FP2 ...S(B3+...)
FADD.X SINA1,FP1 ...A1+S(A2+...)
FADD.X COSB2,FP2 ...B2+S(B3+...)
FMUL.X FP0,FP1 ...S(A1+...)
FMUL.X FP2,FP0 ...S(B2+...)
FMUL.X RPRIME(a6),FP1 ...R'S(A1+...)
FADD.S COSB1,FP0 ...B1+S(B2...)
FMUL.X SPRIME(a6),FP0 ...S'(B1+S(B2+...))
move.l d1,-(sp) ;restore users mode & precision
andi.l #$ff,d1 ;mask off all exceptions
fmove.l d1,FPCR
FADD.X RPRIME(a6),FP1 ...COS(X)
bsr sto_cos ;store cosine result
FMOVE.L (sp)+,FPCR ;restore users exceptions
FADD.S POSNEG1(a6),FP0 ...SIN(X)
bra t_frcinx
NEVEN:
*--REGISTERS SAVED SO FAR: FP2.
FMOVE.X FP0,RPRIME(a6)
FMUL.X FP0,FP0 ...FP0 IS S = R*R
FMOVE.D COSB8,FP1 ...B8
FMOVE.D SINA7,FP2 ...A7
FMUL.X FP0,FP1 ...SB8
FMOVE.X FP0,SPRIME(a6)
FMUL.X FP0,FP2 ...SA7
ROR.L #1,D0
ANDI.L #$80000000,D0
FADD.D COSB7,FP1 ...B7+SB8
FADD.D SINA6,FP2 ...A6+SA7
EOR.L D0,RPRIME(a6)
EOR.L D0,SPRIME(a6)
FMUL.X FP0,FP1 ...S(B7+SB8)
ORI.L #$3F800000,D0
MOVE.L D0,POSNEG1(a6)
FMUL.X FP0,FP2 ...S(A6+SA7)
FADD.D COSB6,FP1 ...B6+S(B7+SB8)
FADD.D SINA5,FP2 ...A5+S(A6+SA7)
FMUL.X FP0,FP1 ...S(B6+S(B7+SB8))
FMUL.X FP0,FP2 ...S(A5+S(A6+SA7))
FADD.D COSB5,FP1 ...B5+S(B6+S(B7+SB8))
FADD.D SINA4,FP2 ...A4+S(A5+S(A6+SA7))
FMUL.X FP0,FP1 ...S(B5+...)
FMUL.X FP0,FP2 ...S(A4+...)
FADD.D COSB4,FP1 ...B4+S(B5+...)
FADD.D SINA3,FP2 ...A3+S(A4+...)
FMUL.X FP0,FP1 ...S(B4+...)
FMUL.X FP0,FP2 ...S(A3+...)
FADD.X COSB3,FP1 ...B3+S(B4+...)
FADD.X SINA2,FP2 ...A2+S(A3+...)
FMUL.X FP0,FP1 ...S(B3+...)
FMUL.X FP0,FP2 ...S(A2+...)
FADD.X COSB2,FP1 ...B2+S(B3+...)
FADD.X SINA1,FP2 ...A1+S(A2+...)
FMUL.X FP0,FP1 ...S(B2+...)
fmul.x fp2,fp0 ...s(a1+...)
FADD.S COSB1,FP1 ...B1+S(B2...)
FMUL.X RPRIME(a6),FP0 ...R'S(A1+...)
FMUL.X SPRIME(a6),FP1 ...S'(B1+S(B2+...))
move.l d1,-(sp) ;save users mode & precision
andi.l #$ff,d1 ;mask off all exceptions
fmove.l d1,FPCR
FADD.S POSNEG1(a6),FP1 ...COS(X)
bsr.b sto_cos ;store cosine result
FMOVE.L (sp)+,FPCR ;restore users exceptions
FADD.X RPRIME(a6),FP0 ...SIN(X)
bra t_frcinx
;SCBORS: ; routine DELETED <5/7/91, JPO> <T3>
; CMPI.L #$3FFF8000,D0 ; <T3>
; BGT.W SREDUCEX ; label renamed <T3>
SCSM:
MOVE.W #$0000,XSINDC(a6) ; <1/7/91, JPO>
FMOVE.S #"$3F800000",FP1
move.l d1,-(sp) ;save users mode & precision
andi.l #$ff,d1 ;mask off all exceptions
fmove.l d1,FPCR
FSUB.S #"$00800000",FP1
bsr.b sto_cos ;store cosine result
FMOVE.L (sp)+,FPCR ;restore users exceptions
FMOVE.X XSIN(a6),FP0 ; <1/7/91, JPO>
bra t_frcinx
; sto_cos (from Motorola file 'sto_res.sa')
sto_cos:
bfextu CMDREG1B(a6){13:3},d0 ;extract cos destination
cmpi.b #3,d0 ;check for fp0/fp1 cases
ble.b c_fp0123
fmovem.x fp1,-(a7)
moveq.l #7,d1
sub.l d0,d1 ;d1 = 7- (dest. reg. no.)
clr.l d0
bset.l d1,d0 ;d0 is dynamic register mask
fmovem.x (a7)+,d0
rts
c_fp0123:
cmpi.b #0,d0
beq.b c_is_fp0
cmpi.b #1,d0
beq.b c_is_fp1
cmpi.b #2,d0
beq.b c_is_fp2
;c_is_fp3: ; label DELETED <1/7/91, JPO>
fmovem.x fp1,USER_FP3(a6)
rts
c_is_fp2:
fmovem.x fp1,USER_FP2(a6)
rts
c_is_fp1:
fmovem.x fp1,USER_FP1(a6)
rts
c_is_fp0:
fmovem.x fp1,USER_FP0(a6)
rts
; stan
; CHANGE LOG:
; 07 Jan 91 JPO Deleted constants BOUNDS1, TWOBYPI, INVTWOPI, TWOPI1, and
; TWOPI2. Moved constants and constant table PITBL to
; file 'constants.a'. Deleted variable equates for INARG,
; TWOTO63, and ENDFLAG (duplication). Renamed variable N
; to NTAN. Renamed label "RESTORE" to "TRESTORE".
; 07 May 91 JPO Deleted variable NTAN and changed all references to it to
; refer to NTRIG. Removed routine "TANBORS" (not referenced).
; Converted routine "REDUCEX" to a subroutine and all
; branches to it to subroutine calls. Added code in
; "REDUCEX" to do a single remainder step if the input
; is very large in order to prevent unwanted overflow.
;
*
* stan.sa 3.2 12/18/90
*
* The entry point stan computes the tangent of
* an input argument;
* stand does the same except for denormalized input.
*
* Input: Double-extended number X in location pointed to
* by address register a0.
*
* Output: The value tan(X) returned in floating-point register Fp0.
*
* Accuracy and Monotonicity: The returned result is within 3 ulp in
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
* result is subsequently rounded to double precision. The
* result is provably monotonic in double precision.
*
* Speed: The program sTAN takes approximately 170 cycles for
* input argument X such that |X| < 15Pi, which is the the usual
* situation.
*
* Algorithm:
*
* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
*
* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
* k = N mod 2, so in particular, k = 0 or 1.
*
* 3. If k is odd, go to 5.
*
* 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
* rational function U/V where
* U = r + r*s*(P1 + s*(P2 + s*P3)), and
* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r.
* Exit.
*
* 5. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
* rational function U/V where
* U = r + r*s*(P1 + s*(P2 + s*P3)), and
* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
* -Cot(r) = -V/U. Exit.
*
* 6. If |X| > 1, go to 8.
*
* 7. (|X|<2**(-40)) Tan(X) = X. Exit.
*
* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
*
* Copyright (C) Motorola, Inc. 1990
* All Rights Reserved
*
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
* The copyright notice above does not evidence any
* actual or intended publication of such source code.
* STAN IDNT 2,1 Motorola 040 Floating Point Software Package
;INARG equ FP_SCR4 ; deleted <1/7/91, JPO>
;TWOTO63 equ L_SCR1 ; deleted <1/7/91, JPO>
;ENDFLAG equ L_SCR2 ; deleted <1/7/91, JPO>
;N equ L_SCR3 ; renamed <1/7/91, JPO>
;NTAN equ L_SCR3 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
stand:
*--TAN(X) = X FOR DENORMALIZED X
bra t_extdnrm
stan:
FMOVE.X (a0),FP0 ...LOAD INPUT
MOVE.L (A0),D0
MOVE.W 4(A0),D0
ANDI.L #$7FFFFFFF,D0
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
BGE.B TANOK1
BRA.W TANSM
TANOK1:
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
BLT.B TANMAIN
; BRA.W REDUCEX ; DELETED <5/7/91, JPO> <T3>
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
MOVE.L NTRIG(A6),D0 ; get N <5/7/91, JPO> <T3>
ROR.L #1,D0 ; rotate bit 0 into bit 31 <5/7/91, JPO> <T3>
BRA.B TANCONT ; continue below <T3>
TANMAIN:
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
FMOVE.X FP0,FP1
FMUL.D TWOBYPI,FP1 ...X*2/PI
*--HIDE THE NEXT TWO INSTRUCTIONS
lea.l PITBL+$200,a1 ...TABLE OF N*PI/2, N = -32,...,32
*--FP1 IS NOW READY
FMOVE.L FP1,D0 ...CONVERT TO INTEGER
ASL.L #4,D0
ADDA.L D0,a1 ...ADDRESS N*PIBY2 IN Y1, Y2
FSUB.X (a1)+,FP0 ...X-Y1
*--HIDE THE NEXT ONE
FSUB.S (a1),FP0 ...FP0 IS R = (X-Y1)-Y2
ROR.L #5,D0
ANDI.L #$80000000,D0 ...D0 WAS ODD IFF D0 < 0
TANCONT:
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
; BLT.B NODD ; DELETED <5/7/91, JPO> <T3>
BMI.B NODD ; <5/7/91, JPO> <T3>
FMOVE.X FP0,FP1
FMUL.X FP1,FP1 ...S = R*R
FMOVE.D TANQ4,FP3
FMOVE.D TANP3,FP2
FMUL.X FP1,FP3 ...SQ4
FMUL.X FP1,FP2 ...SP3
FADD.D TANQ3,FP3 ...Q3+SQ4
FADD.X TANP2,FP2 ...P2+SP3
FMUL.X FP1,FP3 ...S(Q3+SQ4)
FMUL.X FP1,FP2 ...S(P2+SP3)
FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4)
FADD.X TANP1,FP2 ...P1+S(P2+SP3)
FMUL.X FP1,FP3 ...S(Q2+S(Q3+SQ4))
FMUL.X FP1,FP2 ...S(P1+S(P2+SP3))
FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4))
FMUL.X FP0,FP2 ...RS(P1+S(P2+SP3))
FMUL.X FP3,FP1 ...S(Q1+S(Q2+S(Q3+SQ4)))
FADD.X FP2,FP0 ...R+RS(P1+S(P2+SP3))
FADD.S #"$3F800000",FP1 ...1+S(Q1+...)
FMOVE.L d1,fpcr ;restore users exceptions
FDIV.X FP1,FP0 ;last inst - possible exception set
bra t_frcinx
NODD:
FMOVE.X FP0,FP1
FMUL.X FP0,FP0 ...S = R*R
FMOVE.D TANQ4,FP3
FMOVE.D TANP3,FP2
FMUL.X FP0,FP3 ...SQ4
FMUL.X FP0,FP2 ...SP3
FADD.D TANQ3,FP3 ...Q3+SQ4
FADD.X TANP2,FP2 ...P2+SP3
FMUL.X FP0,FP3 ...S(Q3+SQ4)
FMUL.X FP0,FP2 ...S(P2+SP3)
FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4)
FADD.X TANP1,FP2 ...P1+S(P2+SP3)
FMUL.X FP0,FP3 ...S(Q2+S(Q3+SQ4))
FMUL.X FP0,FP2 ...S(P1+S(P2+SP3))
FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4))
FMUL.X FP1,FP2 ...RS(P1+S(P2+SP3))
FMUL.X FP3,FP0 ...S(Q1+S(Q2+S(Q3+SQ4)))
FADD.X FP2,FP1 ...R+RS(P1+S(P2+SP3))
FADD.S #"$3F800000",FP0 ...1+S(Q1+...)
FMOVE.X FP1,-(sp)
EORI.L #$80000000,(sp)
FMOVE.L d1,fpcr ;restore users exceptions
FDIV.X (sp)+,FP0 ;last inst - possible exception set
bra t_frcinx
;TANBORS: ; routine DELETED <5/7/91, JPO> <T3>
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
*--IF |X| < 2**(-40), RETURN X OR 1.
; CMPI.L #$3FFF8000,D0 <T3>
; BGT.B REDUCEX <T3>
TANSM:
FMOVE.X FP0,-(sp)
FMOVE.L d1,fpcr ;restore users exceptions
FMOVE.X (sp)+,FP0 ;last inst - posibble exception set
bra t_frcinx
; Routine "REDUCEX" converted to subroutine with additional code to do a <T3>
; single REMAINDER step via subtraction if abs(input) >= $7ffe 0000 ffff0000 00000000 <T3>
; <5/7/91, JPO>. <T3>
REDUCEX:
*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5
MOVE.L D2,-(A7)
FMOVE.S #"$00000000",FP1 ; initial argument in FP0/FP1
; If compact form of abs(arg) in D0 = $7ffeffff, argument is so large <T3> thru next <T3>
; that there is danger of unwanted overflow in first loop iteration.
; In this case, reduce argument by one remainder step to make subsequent
; reduction safe <5/7/91, JPO>.
cmpi.l #$7ffeffff,d0 ; is argument dangerously large? <5/7/91, JPO>
bne.b LOOP ; no. <5/7/91, JPO>
move.l #$7ffe0000,FP_SCR2(a6) ; yes. create 2**16383*PI/2 at FP_SCR2 <5/7/91, JPO>
move.l #$c90fdaa2,FP_SCR2+4(a6)
clr.l FP_SCR2+8(a6)
ftest.x fp0 ; test sign of argument <5/7/91, JPO>
move.l #$7fdc0000,FP_SCR3(a6) ; create low half of 2**16383*PI/2
move.l #$85a308d3,FP_SCR3+4(a6) ; at FP_SCR3 <5/7/91, JPO>
clr.l FP_SCR3+8(a6)
fblt.w @1 ; negative arg <5/7/91, JPO>
or.w #$8000,FP_SCR2(a6) ; positive arg. negate FP_SCR2
or.w #$8000,FP_SCR3(a6) ; and FP_SCR3 <5/7/91, JPO>
@1:
fadd.x FP_SCR2(a6),fp0 ; high part of reduction <5/7/91, JPO>
fmove.x fp0,fp1 ; save high result in fp1 <5/7/91, JPO>
fadd.x FP_SCR3(a6),fp0 ; low part of reduction <5/7/91, JPO>
fsub.x fp0,fp1 ; determine tail of result <5/7/91, JPO>
fadd.x FP_SCR3(a6),fp1 ; fp0/fp1 are reduced argument <5/7/91, JPO> <T3>
*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
*--integer quotient will be stored in N
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
; On entry, D0 contains compact form of abs(input). Use this value
; to determine if argument is so large (D0 = $7ffeffff) that there
; is a danger of spurious overflow in reduction calculation. If so,
; reduce argument by one remainder step by subtracting 2**16383*PI/2
; from the argument and continuing the reduction normally.
LOOP:
FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2
MOVE.W INARG(a6),D0
MOVE.L D0,A1 ...save a copy of D0
ANDI.L #$00007FFF,D0
SUBI.L #$00003FFF,D0 ...D0 IS K
CMPI.L #28,D0
BLE.B LASTLOOP
;CONTLOOP: ; label not referenced <1/7/91, JPO>
; SUBI.L #28,D0 ...D0 IS L := K-28 - DELETED <6/13/91, JPO> <T4>
SUBI.L #27,D0 ...D0 IS L := K-27 - ADDED <6/13/91, JPO> <T4>
MOVE.L #0,ENDFLAG(a6)
BRA.B WORK
LASTLOOP:
CLR.L D0 ...D0 IS L := 0
MOVE.L #1,ENDFLAG(a6)
WORK:
*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
*--2**L * (PIby2_1), 2**L * (PIby2_2)
MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI
SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI)
MOVE.L #$A2F9836E,FP_SCR1+4(a6)
MOVE.L #$4E44152A,FP_SCR1+8(a6)
MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI)
FMOVE.X FP0,FP2
FMUL.X FP_SCR1(a6),FP2
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
*--US THE DESIRED VALUE IN FLOATING POINT.
*--HIDE SIX CYCLES OF INSTRUCTION
MOVE.L A1,D2
SWAP D2
ANDI.L #$80000000,D2
ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL
MOVE.L D2,TWOTO63(a6)
MOVE.L D0,D2
ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2)
*--FP2 IS READY
FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED
*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
MOVE.W D2,FP_SCR2(a6)
CLR.W FP_SCR2+2(a6)
MOVE.L #$C90FDAA2,FP_SCR2+4(a6)
CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1
*--FP2 IS READY
FSUB.S TWOTO63(a6),FP2 ...FP2 is N
ADDI.L #$00003FDD,D0
MOVE.W D0,FP_SCR3(a6)
CLR.W FP_SCR3+2(a6)
MOVE.L #$85A308D3,FP_SCR3+4(a6)
CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2
MOVE.L ENDFLAG(a6),D0
*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
*--P2 = 2**(L) * Piby2_2
FMOVE.X FP2,FP4
FMul.X FP_SCR2(a6),FP4 ...W = N*P1
FMove.X FP2,FP5
FMul.X FP_SCR3(a6),FP5 ...w = N*P2
FMove.X FP4,FP3
*--we want P+p = W+w but |p| <= half ulp of P
*--Then, we need to compute A := R-P and a := r-p
FAdd.X FP5,FP3 ...FP3 is P
FSub.X FP3,FP4 ...W-P
FSub.X FP3,FP0 ...FP0 is A := R - P
FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w
FMove.X FP0,FP3 ...FP3 A
FSub.X FP4,FP1 ...FP1 is a := r - p
*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
*--|r| <= half ulp of R.
FAdd.X FP1,FP0 ...FP0 is R := A+a
*--No need to calculate r if this is the last loop
; CMPI.L #0,D0 ; DELETED <5/7/91, JPO> <T3>
TST.L D0 ; <5/7/91, JPO> <T3>
BGT.B TRESTORE ; label renamed <1/7/91, JPO>
*--Need to calculate r
FSub.X FP0,FP3 ...A-R
FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a
BRA.W LOOP
TRESTORE: ; label renamed <1/7/91, JPO>
; FMOVE.L FP2,NTAN(a6) ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
FMOVE.L FP2,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
MOVE.L (A7)+,D2
FMOVEM.X (A7)+,FP2-FP5
; MOVE.L NTAN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
; ROR.L #1,D0 ; DELETED from subroutine <5/7/91, JPO> <T3>
; BRA.W TANCONT ; DELETED from subroutine <5/7/91, JPO> <T3>
RTS ; return from subroutine <5/7/91, JPO> <T3>