mirror of
https://github.com/elliotnunn/supermario.git
synced 2024-11-26 01:49:19 +00:00
1188 lines
32 KiB
Plaintext
1188 lines
32 KiB
Plaintext
;
|
|
; File: Trig.a
|
|
;
|
|
; Contains: Routines to emulate trigonometric functions
|
|
;
|
|
; Originally Written by: Motorola Inc.
|
|
; Adapted to Apple/MPW: Jon Okada
|
|
;
|
|
; Copyright: © 1990, 1991 by Apple Computer, Inc., all rights reserved.
|
|
;
|
|
; This file is used in these builds: Mac32
|
|
;
|
|
; Change History (most recent first):
|
|
;
|
|
; <3+> 6/24/91 BG Modified trig argument reduction routine per Motorola
|
|
; version 2.0 to fix sign bug for FCOS of large argument
|
|
; (order of 10**8).
|
|
; <3> 5/24/91 BG Cleanup and optimization to the Sin and Tan routines.
|
|
; <2> 3/30/91 BG Rolling in Jon Okada's latest changes.
|
|
; <1> 12/14/90 BG First checked into TERROR/BBS.
|
|
|
|
; trig.a
|
|
|
|
; Based upon Motorola files 'ssin.sa', 'sto_res.sa', and 'stan.sa'.
|
|
|
|
|
|
; ssin
|
|
|
|
; CHANGE LOG:
|
|
; 07 Jan 91 JPO Deleted constants BOUNDS1, INVTWOPI, TWOPI1, and
|
|
; TWOPI2 (not referenced). Moved constants to
|
|
; file 'constants.a'. Deleted variable equate for
|
|
; XFRAC (not referenced). Renamed variable labels X,
|
|
; XDCARE, and N to XSIN, XSINDC, and NSIN, respectively.
|
|
; Deleted labels "NODD" and "c_is_fp3" (not referenced).
|
|
; Renamed labels REDUCEX, LOOP, WORK, LASTLOOP, and
|
|
; RESTORE to SREDUCEX, SLOOP, SWORK, SLASTLOOP, and
|
|
; SRESTORE, respectively. Appended routine "sto_cos"
|
|
; from Motorola file 'sto_res.sa'.
|
|
; 07 May 91 JPO Renamed variable NSIN to NTRIG. Deleted variable NTAN
|
|
; and changed all references to it to references to NTRIG.
|
|
; Removed routines "SINBORS", "SCBORS", and "TANBORS"
|
|
; (not referenced). Removed routine "SREDUCEX" and converted
|
|
; all branches to it to subroutine calls to "REDUCEX".
|
|
; Converted routine "REDUCEX" to a subroutine and all
|
|
; branches to it to subroutine calls. Added code in
|
|
; "REDUCEX" to do a single remainder step if the input
|
|
; is very large in order to prevent unwanted overflow.
|
|
; 13 Jun 91 JPO Modified trig argument reduction routine per Motorola
|
|
; version 2.0 to fix sign bug for FCOS of large argument
|
|
; (order of 10**8).
|
|
;
|
|
|
|
*
|
|
* ssin.sa 3.1 12/10/90
|
|
*
|
|
* The entry point sSIN computes the sine of an input argument
|
|
* sCOS computes the cosine, and sSINCOS computes both. The
|
|
* corresponding entry points with a "d" computes the same
|
|
* corresponding function values for denormalized inputs.
|
|
*
|
|
* Input: Double-extended number X in location pointed to
|
|
* by address register a0.
|
|
*
|
|
* Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or
|
|
* COS is requested. Otherwise, for SINCOS, sin(X) is returned
|
|
* in Fp0, and cos(X) is returned in Fp1.
|
|
*
|
|
* Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
|
|
*
|
|
* Accuracy and Monotonicity: The returned result is within 1 ulp in
|
|
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
|
|
* result is subsequently rounded to double precision. The
|
|
* result is provably monotonic in double precision.
|
|
*
|
|
* Speed: The programs sSIN and sCOS take approximately 150 cycles for
|
|
* input argument X such that |X| < 15Pi, which is the the usual
|
|
* situation. The speed for sSINCOS is approximately 190 cycles.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* SIN and COS:
|
|
* 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
|
|
*
|
|
* 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
|
|
*
|
|
* 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
|
|
* k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte
|
|
* k by k := k + AdjN.
|
|
*
|
|
* 4. If k is even, go to 6.
|
|
*
|
|
* 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
|
|
* where cos(r) is approximated by an even polynomial in r,
|
|
* 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r.
|
|
* Exit.
|
|
*
|
|
* 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
|
|
* where sin(r) is approximated by an odd polynomial in r
|
|
* r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r.
|
|
* Exit.
|
|
*
|
|
* 7. If |X| > 1, go to 9.
|
|
*
|
|
* 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
|
|
*
|
|
* 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
|
|
*
|
|
* SINCOS:
|
|
* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
|
|
*
|
|
* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
|
|
* k = N mod 4, so in particular, k = 0,1,2,or 3.
|
|
*
|
|
* 3. If k is even, go to 5.
|
|
*
|
|
* 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
|
|
* j1 exclusive or with the l.s.b. of k.
|
|
* sgn1 := (-1)**j1, sgn2 := (-1)**j2.
|
|
* SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
|
|
* sin(r) and cos(r) are computed as odd and even polynomials
|
|
* in r, respectively. Exit
|
|
*
|
|
* 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
|
|
* SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
|
|
* sin(r) and cos(r) are computed as odd and even polynomials
|
|
* in r, respectively. Exit
|
|
*
|
|
* 6. If |X| > 1, go to 8.
|
|
*
|
|
* 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
|
|
*
|
|
* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
|
|
*
|
|
|
|
* Copyright (C) Motorola, Inc. 1990
|
|
* All Rights Reserved
|
|
*
|
|
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
|
|
* The copyright notice above does not evidence any
|
|
* actual or intended publication of such source code.
|
|
|
|
* SSIN IDNT 2,1 Motorola 040 Floating Point Software Package
|
|
|
|
|
|
INARG equ FP_SCR4
|
|
|
|
;X equ FP_SCR5 ; Renamed <1/7/91, JPO>
|
|
;XDCARE equ X+2 ; Renamed <1/7/91, JPO>
|
|
;XFRAC equ X+4 ; Deleted <1/7/91, JPO>
|
|
|
|
XSIN EQU FP_SCR5 ; <1/7/91, JPO>
|
|
XSINDC EQU XSIN+2
|
|
|
|
RPRIME equ FP_SCR1
|
|
SPRIME equ FP_SCR2
|
|
|
|
POSNEG1 equ L_SCR1
|
|
TWOTO63 equ L_SCR1
|
|
|
|
ENDFLAG equ L_SCR2
|
|
|
|
;N equ L_SCR2 ; Renamed <1/7/91, JPO>
|
|
|
|
;NSIN equ L_SCR2 ; <1/7/91, JPO> - RENAMED <T3>
|
|
NTRIG equ L_SCR2 ; <5/7/91, JPO> <T3>
|
|
|
|
ADJN equ L_SCR3
|
|
|
|
|
|
ssind:
|
|
*--SIN(X) = X FOR DENORMALIZED X
|
|
bra t_extdnrm
|
|
|
|
|
|
scosd:
|
|
*--COS(X) = 1 FOR DENORMALIZED X
|
|
|
|
FMOVE.S #"$3F800000",FP0
|
|
*
|
|
* 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
|
|
*
|
|
fmove.l #0,fpsr
|
|
*
|
|
bra t_frcinx
|
|
|
|
|
|
ssin:
|
|
*--SET ADJN TO 0
|
|
MOVE.L #0,ADJN(a6)
|
|
BRA.B SINBGN
|
|
|
|
|
|
scos:
|
|
*--SET ADJN TO 1
|
|
MOVE.L #1,ADJN(a6)
|
|
|
|
SINBGN:
|
|
*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
|
|
|
|
FMOVE.X (a0),FP0 ...LOAD INPUT
|
|
|
|
MOVE.L (A0),D0
|
|
MOVE.W 4(A0),D0
|
|
FMOVE.X FP0,XSIN(a6) ; <1/7/91, JPO>
|
|
ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X
|
|
|
|
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
|
|
BGE.B SOK1
|
|
BRA.W SINSM
|
|
|
|
SOK1:
|
|
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
|
|
BLT.B SINMAIN
|
|
; BRA.W SREDUCEX ; label renamed <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
|
|
BRA.B SINCONT
|
|
|
|
SINMAIN:
|
|
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
|
|
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
|
|
FMOVE.X FP0,FP1
|
|
FMUL.D TWOBYPI,FP1 ...X*2/PI
|
|
|
|
*--HIDE THE NEXT THREE INSTRUCTIONS
|
|
LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
|
|
|
|
|
|
*--FP1 IS NOW READY
|
|
; FMOVE.L FP1,NSIN(a6) ...CONVERT TO INTEGER <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
FMOVE.L FP1,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
|
|
|
|
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
|
|
|
|
ASL.L #4,D0
|
|
ADDA.L D0,A1 ...A1 IS THE ADDRESS OF N*PIBY2
|
|
* ...WHICH IS IN TWO PIECES Y1 & Y2
|
|
|
|
FSUB.X (A1)+,FP0 ...X-Y1
|
|
*--HIDE THE NEXT ONE
|
|
FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2
|
|
|
|
SINCONT:
|
|
*--continuation from REDUCEX
|
|
|
|
*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
|
|
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
|
|
ADD.L ADJN(a6),D0 ...SEE IF D0 IS ODD OR EVEN
|
|
ROR.L #1,D0 ...D0 WAS ODD IFF D0 IS NEGATIVE
|
|
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
|
|
; BLT.B COSPOLY ; DELETED <5/7/91, JPO> <T3>
|
|
BMI.B COSPOLY ; <5/7/91, JPO> <T3>
|
|
|
|
SINPOLY:
|
|
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
|
|
*--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
|
|
*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
|
|
*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
|
|
*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
|
|
*--WHERE T=S*S.
|
|
*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
|
|
*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
|
|
FMOVE.X FP0,XSIN(a6) ...X IS R <1/7/91, JPO>
|
|
FMUL.X FP0,FP0 ...FP0 IS S
|
|
*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
|
|
FMOVE.D SINA7,FP3
|
|
FMOVE.D SINA6,FP2
|
|
*--FP0 IS NOW READY
|
|
FMOVE.X FP0,FP1
|
|
FMUL.X FP1,FP1 ...FP1 IS T
|
|
*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
|
|
|
|
ROR.L #1,D0
|
|
ANDI.L #$80000000,D0
|
|
* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
|
|
EOR.L D0,XSIN(a6) ...X IS NOW R'= SGN*R <1/7/91, JPO>
|
|
|
|
FMUL.X FP1,FP3 ...TA7
|
|
FMUL.X FP1,FP2 ...TA6
|
|
|
|
FADD.D SINA5,FP3 ...A5+TA7
|
|
FADD.D SINA4,FP2 ...A4+TA6
|
|
|
|
FMUL.X FP1,FP3 ...T(A5+TA7)
|
|
FMUL.X FP1,FP2 ...T(A4+TA6)
|
|
|
|
FADD.D SINA3,FP3 ...A3+T(A5+TA7)
|
|
FADD.X SINA2,FP2 ...A2+T(A4+TA6)
|
|
|
|
FMUL.X FP3,FP1 ...T(A3+T(A5+TA7))
|
|
|
|
FMUL.X FP0,FP2 ...S(A2+T(A4+TA6))
|
|
FADD.X SINA1,FP1 ...A1+T(A3+T(A5+TA7))
|
|
FMUL.X XSIN(a6),FP0 ...R'*S <1/7/91, JPO>
|
|
|
|
FADD.X FP2,FP1 ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
|
|
*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
|
|
*--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
|
|
|
|
|
|
FMUL.X FP1,FP0 ...SIN(R')-R'
|
|
*--FP1 RELEASED.
|
|
|
|
FMOVE.L d1,FPCR ;restore users exceptions
|
|
FADD.X XSIN(a6),FP0 ;last inst - possible exception set <1/7/91, JPO>
|
|
bra t_frcinx
|
|
|
|
|
|
COSPOLY:
|
|
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
|
|
*--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
|
|
*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
|
|
*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
|
|
*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
|
|
*--WHERE T=S*S.
|
|
*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
|
|
*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
|
|
*--AND IS THEREFORE STORED AS SINGLE PRECISION.
|
|
|
|
FMUL.X FP0,FP0 ...FP0 IS S
|
|
*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
|
|
FMOVE.D COSB8,FP2
|
|
FMOVE.D COSB7,FP3
|
|
*--FP0 IS NOW READY
|
|
FMOVE.X FP0,FP1
|
|
FMUL.X FP1,FP1 ...FP1 IS T
|
|
*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
|
|
FMOVE.X FP0,XSIN(a6) ...X IS S <1/7/91, JPO>
|
|
ROR.L #1,D0
|
|
ANDI.L #$80000000,D0
|
|
* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
|
|
|
|
FMUL.X FP1,FP2 ...TB8
|
|
*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
|
|
EOR.L D0,XSIN(a6) ...X IS NOW S'= SGN*S <1/7/91, JPO>
|
|
ANDI.L #$80000000,D0
|
|
|
|
FMUL.X FP1,FP3 ...TB7
|
|
*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
|
|
ORI.L #$3F800000,D0 ...D0 IS SGN IN SINGLE
|
|
MOVE.L D0,POSNEG1(a6)
|
|
|
|
FADD.D COSB6,FP2 ...B6+TB8
|
|
FADD.D COSB5,FP3 ...B5+TB7
|
|
|
|
FMUL.X FP1,FP2 ...T(B6+TB8)
|
|
FMUL.X FP1,FP3 ...T(B5+TB7)
|
|
|
|
FADD.D COSB4,FP2 ...B4+T(B6+TB8)
|
|
FADD.X COSB3,FP3 ...B3+T(B5+TB7)
|
|
|
|
FMUL.X FP1,FP2 ...T(B4+T(B6+TB8))
|
|
FMUL.X FP3,FP1 ...T(B3+T(B5+TB7))
|
|
|
|
FADD.X COSB2,FP2 ...B2+T(B4+T(B6+TB8))
|
|
FADD.S COSB1,FP1 ...B1+T(B3+T(B5+TB7))
|
|
|
|
FMUL.X FP2,FP0 ...S(B2+T(B4+T(B6+TB8)))
|
|
*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
|
|
*--FP2 RELEASED.
|
|
|
|
|
|
FADD.X FP1,FP0
|
|
*--FP1 RELEASED
|
|
|
|
FMUL.X XSIN(a6),FP0 ; <1/7/91, JPO>
|
|
|
|
FMOVE.L d1,FPCR ;restore users exceptions
|
|
FADD.S POSNEG1(a6),FP0 ;last inst - possible exception set
|
|
bra t_frcinx
|
|
|
|
|
|
;SINBORS: ; routine DELETED - <5/7/91, JPO> <T3>
|
|
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
|
|
*--IF |X| < 2**(-40), RETURN X OR 1.
|
|
; CMPI.L #$3FFF8000,D0 <T3>
|
|
; BGT.B SREDUCEX ; label renamed <1/7/91, JPO> <T3>
|
|
|
|
|
|
SINSM:
|
|
MOVE.L ADJN(a6),D0
|
|
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
|
|
BGT.B COSTINY
|
|
|
|
SINTINY:
|
|
MOVE.W #$0000,XSINDC(a6) ...JUST IN CASE <1/7/91, JPO>
|
|
FMOVE.L d1,FPCR ;restore users exceptions
|
|
FMOVE.X XSIN(a6),FP0 ;last inst - possible exception set <1/7/91, JPO>
|
|
bra t_frcinx
|
|
|
|
|
|
COSTINY:
|
|
FMOVE.S #"$3F800000",FP0
|
|
|
|
FMOVE.L d1,FPCR ;restore users exceptions
|
|
FSUB.S #"$00800000",FP0 ;last inst - possible exception set
|
|
bra t_frcinx
|
|
|
|
|
|
; Routine "SREDUCEX" is DELETED and replaced by subroutine "REDUCEX" <T3> thru next <T3>
|
|
; (below) <5/7/91, JPO>.
|
|
|
|
;SREDUCEX: ; label renamed <1/7/91, JPO>
|
|
*--WHEN SREDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
|
|
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
|
|
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
|
|
|
|
; FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5
|
|
; MOVE.L D2,-(A7)
|
|
; FMOVE.S #"$00000000",FP1
|
|
|
|
*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
|
|
*--integer quotient will be stored in N
|
|
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
|
|
|
|
;SLOOP: ; label renamed <1/7/91, JPO>
|
|
; FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2
|
|
; MOVE.W INARG(a6),D0
|
|
; MOVE.L D0,A1 ...save a copy of D0
|
|
; ANDI.L #$00007FFF,D0
|
|
; SUBI.L #$00003FFF,D0 ...D0 IS K
|
|
; CMPI.L #28,D0
|
|
; BLE.B SLASTLOOP ; label renamed <1/7/91, JPO>
|
|
;CONTLOOP: ; label not referenced <1/7/91, JPO>
|
|
; SUBI.L #28,D0 ...D0 IS L := K-28
|
|
; MOVE.L #0,ENDFLAG(a6)
|
|
; BRA.B SWORK ; label renamed <1/7/91, JPO>
|
|
;SLASTLOOP: ; label renamed <1/7/91, JPO>
|
|
; CLR.L D0 ...D0 IS L := 0
|
|
; MOVE.L #1,ENDFLAG(a6)
|
|
|
|
;SWORK: ; label renamed <1/7/91, JPO>
|
|
*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
|
|
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
|
|
|
|
*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
|
|
*--2**L * (PIby2_1), 2**L * (PIby2_2)
|
|
|
|
; MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI
|
|
; SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI)
|
|
|
|
; MOVE.L #$A2F9836E,FP_SCR1+4(a6)
|
|
; MOVE.L #$4E44152A,FP_SCR1+8(a6)
|
|
; MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI)
|
|
|
|
; FMOVE.X FP0,FP2
|
|
; FMUL.X FP_SCR1(a6),FP2
|
|
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
|
|
*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
|
|
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
|
|
*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
|
|
*--US THE DESIRED VALUE IN FLOATING POINT.
|
|
|
|
*--HIDE SIX CYCLES OF INSTRUCTION
|
|
; MOVE.L A1,D2
|
|
; SWAP D2
|
|
; ANDI.L #$80000000,D2
|
|
; ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL
|
|
; MOVE.L D2,TWOTO63(a6)
|
|
|
|
; MOVE.L D0,D2
|
|
; ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2)
|
|
|
|
*--FP2 IS READY
|
|
; FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED
|
|
|
|
*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
|
|
; MOVE.W D2,FP_SCR2(a6)
|
|
; CLR.W FP_SCR2+2(a6)
|
|
; MOVE.L #$C90FDAA2,FP_SCR2+4(a6)
|
|
; CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1
|
|
|
|
*--FP2 IS READY
|
|
; FSUB.S TWOTO63(a6),FP2 ...FP2 is N
|
|
|
|
; ADDI.L #$00003FDD,D0
|
|
; MOVE.W D0,FP_SCR3(a6)
|
|
; CLR.W FP_SCR3+2(a6)
|
|
; MOVE.L #$85A308D3,FP_SCR3+4(a6)
|
|
; CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2
|
|
|
|
; MOVE.L ENDFLAG(a6),D0
|
|
|
|
*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
|
|
*--P2 = 2**(L) * Piby2_2
|
|
; FMOVE.X FP2,FP4
|
|
; FMul.X FP_SCR2(a6),FP4 ...W = N*P1
|
|
; FMove.X FP2,FP5
|
|
; FMul.X FP_SCR3(a6),FP5 ...w = N*P2
|
|
; FMove.X FP4,FP3
|
|
*--we want P+p = W+w but |p| <= half ulp of P
|
|
*--Then, we need to compute A := R-P and a := r-p
|
|
; FAdd.X FP5,FP3 ...FP3 is P
|
|
; FSub.X FP3,FP4 ...W-P
|
|
|
|
; FSub.X FP3,FP0 ...FP0 is A := R - P
|
|
; FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w
|
|
|
|
; FMove.X FP0,FP3 ...FP3 A
|
|
; FSub.X FP4,FP1 ...FP1 is a := r - p
|
|
|
|
*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
|
|
*--|r| <= half ulp of R.
|
|
; FAdd.X FP1,FP0 ...FP0 is R := A+a
|
|
*--No need to calculate r if this is the last loop
|
|
; CMPI.L #0,D0
|
|
; BGT.B SRESTORE ; label renamed <1/7/91, JPO>
|
|
|
|
*--Need to calculate r
|
|
; FSub.X FP0,FP3 ...A-R
|
|
; FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a
|
|
; BRA.W SLOOP
|
|
|
|
;SRESTORE: ; label renamed <1/7/91, JPO>
|
|
; FMOVE.L FP2,NSIN(a6)
|
|
; MOVE.L (A7)+,D2
|
|
; FMOVEM.X (A7)+,FP2-FP5
|
|
|
|
|
|
; MOVE.L ADJN(a6),D0
|
|
; CMPI.L #4,D0
|
|
|
|
; BLT.W SINCONT
|
|
; BRA.B SCCONT <T3>
|
|
|
|
|
|
ssincosd:
|
|
*--SIN AND COS OF X FOR DENORMALIZED X
|
|
|
|
FMOVE.S #"$3F800000",FP1
|
|
bsr sto_cos ;store cosine result
|
|
bra t_extdnrm
|
|
|
|
|
|
ssincos:
|
|
*--SET ADJN TO 4
|
|
MOVE.L #4,ADJN(a6)
|
|
|
|
FMOVE.X (a0),FP0 ...LOAD INPUT
|
|
|
|
MOVE.L (A0),D0
|
|
MOVE.W 4(A0),D0
|
|
FMOVE.X FP0,XSIN(a6) ; <1/7/91, JPO>
|
|
ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X
|
|
|
|
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
|
|
BGE.B SCOK1
|
|
BRA.W SCSM
|
|
|
|
SCOK1:
|
|
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
|
|
BLT.B SCMAIN
|
|
; BRA.W SREDUCEX ; label renamed - DELETED <5/7/91, JPO> <T3>
|
|
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
|
|
BRA.B SCCONT ; continue below <5/7/91, JPO> <T3>
|
|
|
|
|
|
SCMAIN:
|
|
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
|
|
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
|
|
FMOVE.X FP0,FP1
|
|
FMUL.D TWOBYPI,FP1 ...X*2/PI
|
|
|
|
*--HIDE THE NEXT THREE INSTRUCTIONS
|
|
LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
|
|
|
|
|
|
*--FP1 IS NOW READY
|
|
; FMOVE.L FP1,NSIN(a6) ...CONVERT TO INTEGER <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
FMOVE.L FP1,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
|
|
|
|
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
|
|
ASL.L #4,D0
|
|
ADDA.L D0,A1 ...ADDRESS OF N*PIBY2, IN Y1, Y2
|
|
|
|
FSUB.X (A1)+,FP0 ...X-Y1
|
|
FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2
|
|
|
|
SCCONT:
|
|
*--continuation point from REDUCEX
|
|
|
|
*--HIDE THE NEXT TWO
|
|
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
|
|
ROR.L #1,D0
|
|
|
|
; CMPI.L #0,D0 ...D0 < 0 IFF N IS ODD - DELETED (unnecessary) <5/7/91, JPO> <T3>
|
|
; BGE.W NEVEN ; DELETED <5/7/91, JPO> <T3>
|
|
BPL.W NEVEN ; <5/7/91, JPO> <T3>
|
|
|
|
;NODD: ; label DELETED <1/7/91, JPO>
|
|
*--REGISTERS SAVED SO FAR: D0, A0, FP2.
|
|
|
|
FMOVE.X FP0,RPRIME(a6)
|
|
FMUL.X FP0,FP0 ...FP0 IS S = R*R
|
|
FMOVE.D SINA7,FP1 ...A7
|
|
FMOVE.D COSB8,FP2 ...B8
|
|
FMUL.X FP0,FP1 ...SA7
|
|
MOVE.L d2,-(A7)
|
|
MOVE.L D0,d2
|
|
FMUL.X FP0,FP2 ...SB8
|
|
ROR.L #1,d2
|
|
ANDI.L #$80000000,d2
|
|
|
|
FADD.D SINA6,FP1 ...A6+SA7
|
|
EOR.L D0,d2
|
|
ANDI.L #$80000000,d2
|
|
FADD.D COSB7,FP2 ...B7+SB8
|
|
|
|
FMUL.X FP0,FP1 ...S(A6+SA7)
|
|
EOR.L d2,RPRIME(a6)
|
|
MOVE.L (A7)+,d2
|
|
FMUL.X FP0,FP2 ...S(B7+SB8)
|
|
ROR.L #1,D0
|
|
ANDI.L #$80000000,D0
|
|
|
|
FADD.D SINA5,FP1 ...A5+S(A6+SA7)
|
|
MOVE.L #$3F800000,POSNEG1(a6)
|
|
EOR.L D0,POSNEG1(a6)
|
|
FADD.D COSB6,FP2 ...B6+S(B7+SB8)
|
|
|
|
FMUL.X FP0,FP1 ...S(A5+S(A6+SA7))
|
|
FMUL.X FP0,FP2 ...S(B6+S(B7+SB8))
|
|
FMOVE.X FP0,SPRIME(a6)
|
|
|
|
FADD.D SINA4,FP1 ...A4+S(A5+S(A6+SA7))
|
|
EOR.L D0,SPRIME(a6)
|
|
FADD.D COSB5,FP2 ...B5+S(B6+S(B7+SB8))
|
|
|
|
FMUL.X FP0,FP1 ...S(A4+...)
|
|
FMUL.X FP0,FP2 ...S(B5+...)
|
|
|
|
FADD.D SINA3,FP1 ...A3+S(A4+...)
|
|
FADD.D COSB4,FP2 ...B4+S(B5+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(A3+...)
|
|
FMUL.X FP0,FP2 ...S(B4+...)
|
|
|
|
FADD.X SINA2,FP1 ...A2+S(A3+...)
|
|
FADD.X COSB3,FP2 ...B3+S(B4+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(A2+...)
|
|
FMUL.X FP0,FP2 ...S(B3+...)
|
|
|
|
FADD.X SINA1,FP1 ...A1+S(A2+...)
|
|
FADD.X COSB2,FP2 ...B2+S(B3+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(A1+...)
|
|
FMUL.X FP2,FP0 ...S(B2+...)
|
|
|
|
|
|
|
|
FMUL.X RPRIME(a6),FP1 ...R'S(A1+...)
|
|
FADD.S COSB1,FP0 ...B1+S(B2...)
|
|
FMUL.X SPRIME(a6),FP0 ...S'(B1+S(B2+...))
|
|
|
|
move.l d1,-(sp) ;restore users mode & precision
|
|
andi.l #$ff,d1 ;mask off all exceptions
|
|
fmove.l d1,FPCR
|
|
FADD.X RPRIME(a6),FP1 ...COS(X)
|
|
bsr sto_cos ;store cosine result
|
|
FMOVE.L (sp)+,FPCR ;restore users exceptions
|
|
FADD.S POSNEG1(a6),FP0 ...SIN(X)
|
|
|
|
bra t_frcinx
|
|
|
|
|
|
NEVEN:
|
|
*--REGISTERS SAVED SO FAR: FP2.
|
|
|
|
FMOVE.X FP0,RPRIME(a6)
|
|
FMUL.X FP0,FP0 ...FP0 IS S = R*R
|
|
FMOVE.D COSB8,FP1 ...B8
|
|
FMOVE.D SINA7,FP2 ...A7
|
|
FMUL.X FP0,FP1 ...SB8
|
|
FMOVE.X FP0,SPRIME(a6)
|
|
FMUL.X FP0,FP2 ...SA7
|
|
ROR.L #1,D0
|
|
ANDI.L #$80000000,D0
|
|
FADD.D COSB7,FP1 ...B7+SB8
|
|
FADD.D SINA6,FP2 ...A6+SA7
|
|
EOR.L D0,RPRIME(a6)
|
|
EOR.L D0,SPRIME(a6)
|
|
FMUL.X FP0,FP1 ...S(B7+SB8)
|
|
ORI.L #$3F800000,D0
|
|
MOVE.L D0,POSNEG1(a6)
|
|
FMUL.X FP0,FP2 ...S(A6+SA7)
|
|
|
|
FADD.D COSB6,FP1 ...B6+S(B7+SB8)
|
|
FADD.D SINA5,FP2 ...A5+S(A6+SA7)
|
|
|
|
FMUL.X FP0,FP1 ...S(B6+S(B7+SB8))
|
|
FMUL.X FP0,FP2 ...S(A5+S(A6+SA7))
|
|
|
|
FADD.D COSB5,FP1 ...B5+S(B6+S(B7+SB8))
|
|
FADD.D SINA4,FP2 ...A4+S(A5+S(A6+SA7))
|
|
|
|
FMUL.X FP0,FP1 ...S(B5+...)
|
|
FMUL.X FP0,FP2 ...S(A4+...)
|
|
|
|
FADD.D COSB4,FP1 ...B4+S(B5+...)
|
|
FADD.D SINA3,FP2 ...A3+S(A4+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(B4+...)
|
|
FMUL.X FP0,FP2 ...S(A3+...)
|
|
|
|
FADD.X COSB3,FP1 ...B3+S(B4+...)
|
|
FADD.X SINA2,FP2 ...A2+S(A3+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(B3+...)
|
|
FMUL.X FP0,FP2 ...S(A2+...)
|
|
|
|
FADD.X COSB2,FP1 ...B2+S(B3+...)
|
|
FADD.X SINA1,FP2 ...A1+S(A2+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(B2+...)
|
|
fmul.x fp2,fp0 ...s(a1+...)
|
|
|
|
|
|
|
|
FADD.S COSB1,FP1 ...B1+S(B2...)
|
|
FMUL.X RPRIME(a6),FP0 ...R'S(A1+...)
|
|
FMUL.X SPRIME(a6),FP1 ...S'(B1+S(B2+...))
|
|
|
|
move.l d1,-(sp) ;save users mode & precision
|
|
andi.l #$ff,d1 ;mask off all exceptions
|
|
fmove.l d1,FPCR
|
|
FADD.S POSNEG1(a6),FP1 ...COS(X)
|
|
bsr.b sto_cos ;store cosine result
|
|
FMOVE.L (sp)+,FPCR ;restore users exceptions
|
|
FADD.X RPRIME(a6),FP0 ...SIN(X)
|
|
|
|
bra t_frcinx
|
|
|
|
;SCBORS: ; routine DELETED <5/7/91, JPO> <T3>
|
|
; CMPI.L #$3FFF8000,D0 ; <T3>
|
|
; BGT.W SREDUCEX ; label renamed <T3>
|
|
|
|
|
|
SCSM:
|
|
MOVE.W #$0000,XSINDC(a6) ; <1/7/91, JPO>
|
|
FMOVE.S #"$3F800000",FP1
|
|
|
|
move.l d1,-(sp) ;save users mode & precision
|
|
andi.l #$ff,d1 ;mask off all exceptions
|
|
fmove.l d1,FPCR
|
|
FSUB.S #"$00800000",FP1
|
|
bsr.b sto_cos ;store cosine result
|
|
FMOVE.L (sp)+,FPCR ;restore users exceptions
|
|
FMOVE.X XSIN(a6),FP0 ; <1/7/91, JPO>
|
|
bra t_frcinx
|
|
|
|
|
|
; sto_cos (from Motorola file 'sto_res.sa')
|
|
sto_cos:
|
|
bfextu CMDREG1B(a6){13:3},d0 ;extract cos destination
|
|
cmpi.b #3,d0 ;check for fp0/fp1 cases
|
|
ble.b c_fp0123
|
|
fmovem.x fp1,-(a7)
|
|
moveq.l #7,d1
|
|
sub.l d0,d1 ;d1 = 7- (dest. reg. no.)
|
|
clr.l d0
|
|
bset.l d1,d0 ;d0 is dynamic register mask
|
|
fmovem.x (a7)+,d0
|
|
rts
|
|
c_fp0123:
|
|
cmpi.b #0,d0
|
|
beq.b c_is_fp0
|
|
cmpi.b #1,d0
|
|
beq.b c_is_fp1
|
|
cmpi.b #2,d0
|
|
beq.b c_is_fp2
|
|
;c_is_fp3: ; label DELETED <1/7/91, JPO>
|
|
fmovem.x fp1,USER_FP3(a6)
|
|
rts
|
|
c_is_fp2:
|
|
fmovem.x fp1,USER_FP2(a6)
|
|
rts
|
|
c_is_fp1:
|
|
fmovem.x fp1,USER_FP1(a6)
|
|
rts
|
|
c_is_fp0:
|
|
fmovem.x fp1,USER_FP0(a6)
|
|
rts
|
|
|
|
|
|
|
|
|
|
; stan
|
|
|
|
; CHANGE LOG:
|
|
; 07 Jan 91 JPO Deleted constants BOUNDS1, TWOBYPI, INVTWOPI, TWOPI1, and
|
|
; TWOPI2. Moved constants and constant table PITBL to
|
|
; file 'constants.a'. Deleted variable equates for INARG,
|
|
; TWOTO63, and ENDFLAG (duplication). Renamed variable N
|
|
; to NTAN. Renamed label "RESTORE" to "TRESTORE".
|
|
; 07 May 91 JPO Deleted variable NTAN and changed all references to it to
|
|
; refer to NTRIG. Removed routine "TANBORS" (not referenced).
|
|
; Converted routine "REDUCEX" to a subroutine and all
|
|
; branches to it to subroutine calls. Added code in
|
|
; "REDUCEX" to do a single remainder step if the input
|
|
; is very large in order to prevent unwanted overflow.
|
|
;
|
|
|
|
*
|
|
* stan.sa 3.2 12/18/90
|
|
*
|
|
* The entry point stan computes the tangent of
|
|
* an input argument;
|
|
* stand does the same except for denormalized input.
|
|
*
|
|
* Input: Double-extended number X in location pointed to
|
|
* by address register a0.
|
|
*
|
|
* Output: The value tan(X) returned in floating-point register Fp0.
|
|
*
|
|
* Accuracy and Monotonicity: The returned result is within 3 ulp in
|
|
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
|
|
* result is subsequently rounded to double precision. The
|
|
* result is provably monotonic in double precision.
|
|
*
|
|
* Speed: The program sTAN takes approximately 170 cycles for
|
|
* input argument X such that |X| < 15Pi, which is the the usual
|
|
* situation.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
|
|
*
|
|
* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
|
|
* k = N mod 2, so in particular, k = 0 or 1.
|
|
*
|
|
* 3. If k is odd, go to 5.
|
|
*
|
|
* 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
|
|
* rational function U/V where
|
|
* U = r + r*s*(P1 + s*(P2 + s*P3)), and
|
|
* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r.
|
|
* Exit.
|
|
*
|
|
* 5. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
|
|
* rational function U/V where
|
|
* U = r + r*s*(P1 + s*(P2 + s*P3)), and
|
|
* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
|
|
* -Cot(r) = -V/U. Exit.
|
|
*
|
|
* 6. If |X| > 1, go to 8.
|
|
*
|
|
* 7. (|X|<2**(-40)) Tan(X) = X. Exit.
|
|
*
|
|
* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
|
|
*
|
|
|
|
* Copyright (C) Motorola, Inc. 1990
|
|
* All Rights Reserved
|
|
*
|
|
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
|
|
* The copyright notice above does not evidence any
|
|
* actual or intended publication of such source code.
|
|
|
|
* STAN IDNT 2,1 Motorola 040 Floating Point Software Package
|
|
|
|
|
|
|
|
;INARG equ FP_SCR4 ; deleted <1/7/91, JPO>
|
|
|
|
;TWOTO63 equ L_SCR1 ; deleted <1/7/91, JPO>
|
|
;ENDFLAG equ L_SCR2 ; deleted <1/7/91, JPO>
|
|
|
|
;N equ L_SCR3 ; renamed <1/7/91, JPO>
|
|
;NTAN equ L_SCR3 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
|
|
|
|
stand:
|
|
*--TAN(X) = X FOR DENORMALIZED X
|
|
|
|
bra t_extdnrm
|
|
|
|
|
|
stan:
|
|
FMOVE.X (a0),FP0 ...LOAD INPUT
|
|
|
|
MOVE.L (A0),D0
|
|
MOVE.W 4(A0),D0
|
|
ANDI.L #$7FFFFFFF,D0
|
|
|
|
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
|
|
BGE.B TANOK1
|
|
BRA.W TANSM
|
|
TANOK1:
|
|
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
|
|
BLT.B TANMAIN
|
|
; BRA.W REDUCEX ; DELETED <5/7/91, JPO> <T3>
|
|
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(A6),D0 ; get N <5/7/91, JPO> <T3>
|
|
ROR.L #1,D0 ; rotate bit 0 into bit 31 <5/7/91, JPO> <T3>
|
|
BRA.B TANCONT ; continue below <T3>
|
|
|
|
TANMAIN:
|
|
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
|
|
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
|
|
FMOVE.X FP0,FP1
|
|
FMUL.D TWOBYPI,FP1 ...X*2/PI
|
|
|
|
*--HIDE THE NEXT TWO INSTRUCTIONS
|
|
lea.l PITBL+$200,a1 ...TABLE OF N*PI/2, N = -32,...,32
|
|
|
|
*--FP1 IS NOW READY
|
|
FMOVE.L FP1,D0 ...CONVERT TO INTEGER
|
|
|
|
ASL.L #4,D0
|
|
ADDA.L D0,a1 ...ADDRESS N*PIBY2 IN Y1, Y2
|
|
|
|
FSUB.X (a1)+,FP0 ...X-Y1
|
|
*--HIDE THE NEXT ONE
|
|
|
|
FSUB.S (a1),FP0 ...FP0 IS R = (X-Y1)-Y2
|
|
|
|
ROR.L #5,D0
|
|
ANDI.L #$80000000,D0 ...D0 WAS ODD IFF D0 < 0
|
|
|
|
TANCONT:
|
|
|
|
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
|
|
; BLT.B NODD ; DELETED <5/7/91, JPO> <T3>
|
|
BMI.B NODD ; <5/7/91, JPO> <T3>
|
|
|
|
FMOVE.X FP0,FP1
|
|
FMUL.X FP1,FP1 ...S = R*R
|
|
|
|
FMOVE.D TANQ4,FP3
|
|
FMOVE.D TANP3,FP2
|
|
|
|
FMUL.X FP1,FP3 ...SQ4
|
|
FMUL.X FP1,FP2 ...SP3
|
|
|
|
FADD.D TANQ3,FP3 ...Q3+SQ4
|
|
FADD.X TANP2,FP2 ...P2+SP3
|
|
|
|
FMUL.X FP1,FP3 ...S(Q3+SQ4)
|
|
FMUL.X FP1,FP2 ...S(P2+SP3)
|
|
|
|
FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4)
|
|
FADD.X TANP1,FP2 ...P1+S(P2+SP3)
|
|
|
|
FMUL.X FP1,FP3 ...S(Q2+S(Q3+SQ4))
|
|
FMUL.X FP1,FP2 ...S(P1+S(P2+SP3))
|
|
|
|
FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4))
|
|
FMUL.X FP0,FP2 ...RS(P1+S(P2+SP3))
|
|
|
|
FMUL.X FP3,FP1 ...S(Q1+S(Q2+S(Q3+SQ4)))
|
|
|
|
|
|
FADD.X FP2,FP0 ...R+RS(P1+S(P2+SP3))
|
|
|
|
|
|
FADD.S #"$3F800000",FP1 ...1+S(Q1+...)
|
|
|
|
FMOVE.L d1,fpcr ;restore users exceptions
|
|
FDIV.X FP1,FP0 ;last inst - possible exception set
|
|
|
|
bra t_frcinx
|
|
|
|
NODD:
|
|
FMOVE.X FP0,FP1
|
|
FMUL.X FP0,FP0 ...S = R*R
|
|
|
|
FMOVE.D TANQ4,FP3
|
|
FMOVE.D TANP3,FP2
|
|
|
|
FMUL.X FP0,FP3 ...SQ4
|
|
FMUL.X FP0,FP2 ...SP3
|
|
|
|
FADD.D TANQ3,FP3 ...Q3+SQ4
|
|
FADD.X TANP2,FP2 ...P2+SP3
|
|
|
|
FMUL.X FP0,FP3 ...S(Q3+SQ4)
|
|
FMUL.X FP0,FP2 ...S(P2+SP3)
|
|
|
|
FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4)
|
|
FADD.X TANP1,FP2 ...P1+S(P2+SP3)
|
|
|
|
FMUL.X FP0,FP3 ...S(Q2+S(Q3+SQ4))
|
|
FMUL.X FP0,FP2 ...S(P1+S(P2+SP3))
|
|
|
|
FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4))
|
|
FMUL.X FP1,FP2 ...RS(P1+S(P2+SP3))
|
|
|
|
FMUL.X FP3,FP0 ...S(Q1+S(Q2+S(Q3+SQ4)))
|
|
|
|
|
|
FADD.X FP2,FP1 ...R+RS(P1+S(P2+SP3))
|
|
FADD.S #"$3F800000",FP0 ...1+S(Q1+...)
|
|
|
|
|
|
FMOVE.X FP1,-(sp)
|
|
EORI.L #$80000000,(sp)
|
|
|
|
FMOVE.L d1,fpcr ;restore users exceptions
|
|
FDIV.X (sp)+,FP0 ;last inst - possible exception set
|
|
|
|
bra t_frcinx
|
|
|
|
;TANBORS: ; routine DELETED <5/7/91, JPO> <T3>
|
|
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
|
|
*--IF |X| < 2**(-40), RETURN X OR 1.
|
|
; CMPI.L #$3FFF8000,D0 <T3>
|
|
; BGT.B REDUCEX <T3>
|
|
|
|
TANSM:
|
|
|
|
FMOVE.X FP0,-(sp)
|
|
FMOVE.L d1,fpcr ;restore users exceptions
|
|
FMOVE.X (sp)+,FP0 ;last inst - posibble exception set
|
|
|
|
bra t_frcinx
|
|
|
|
|
|
; Routine "REDUCEX" converted to subroutine with additional code to do a <T3>
|
|
; single REMAINDER step via subtraction if abs(input) >= $7ffe 0000 ffff0000 00000000 <T3>
|
|
; <5/7/91, JPO>. <T3>
|
|
|
|
REDUCEX:
|
|
*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
|
|
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
|
|
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
|
|
|
|
FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5
|
|
MOVE.L D2,-(A7)
|
|
FMOVE.S #"$00000000",FP1 ; initial argument in FP0/FP1
|
|
|
|
; If compact form of abs(arg) in D0 = $7ffeffff, argument is so large <T3> thru next <T3>
|
|
; that there is danger of unwanted overflow in first loop iteration.
|
|
; In this case, reduce argument by one remainder step to make subsequent
|
|
; reduction safe <5/7/91, JPO>.
|
|
|
|
cmpi.l #$7ffeffff,d0 ; is argument dangerously large? <5/7/91, JPO>
|
|
bne.b LOOP ; no. <5/7/91, JPO>
|
|
|
|
move.l #$7ffe0000,FP_SCR2(a6) ; yes. create 2**16383*PI/2 at FP_SCR2 <5/7/91, JPO>
|
|
move.l #$c90fdaa2,FP_SCR2+4(a6)
|
|
clr.l FP_SCR2+8(a6)
|
|
ftest.x fp0 ; test sign of argument <5/7/91, JPO>
|
|
move.l #$7fdc0000,FP_SCR3(a6) ; create low half of 2**16383*PI/2
|
|
move.l #$85a308d3,FP_SCR3+4(a6) ; at FP_SCR3 <5/7/91, JPO>
|
|
clr.l FP_SCR3+8(a6)
|
|
fblt.w @1 ; negative arg <5/7/91, JPO>
|
|
|
|
or.w #$8000,FP_SCR2(a6) ; positive arg. negate FP_SCR2
|
|
or.w #$8000,FP_SCR3(a6) ; and FP_SCR3 <5/7/91, JPO>
|
|
@1:
|
|
fadd.x FP_SCR2(a6),fp0 ; high part of reduction <5/7/91, JPO>
|
|
fmove.x fp0,fp1 ; save high result in fp1 <5/7/91, JPO>
|
|
fadd.x FP_SCR3(a6),fp0 ; low part of reduction <5/7/91, JPO>
|
|
fsub.x fp0,fp1 ; determine tail of result <5/7/91, JPO>
|
|
fadd.x FP_SCR3(a6),fp1 ; fp0/fp1 are reduced argument <5/7/91, JPO> <T3>
|
|
|
|
*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
|
|
*--integer quotient will be stored in N
|
|
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
|
|
|
|
; On entry, D0 contains compact form of abs(input). Use this value
|
|
; to determine if argument is so large (D0 = $7ffeffff) that there
|
|
; is a danger of spurious overflow in reduction calculation. If so,
|
|
; reduce argument by one remainder step by subtracting 2**16383*PI/2
|
|
; from the argument and continuing the reduction normally.
|
|
|
|
LOOP:
|
|
FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2
|
|
MOVE.W INARG(a6),D0
|
|
MOVE.L D0,A1 ...save a copy of D0
|
|
ANDI.L #$00007FFF,D0
|
|
SUBI.L #$00003FFF,D0 ...D0 IS K
|
|
CMPI.L #28,D0
|
|
BLE.B LASTLOOP
|
|
;CONTLOOP: ; label not referenced <1/7/91, JPO>
|
|
; SUBI.L #28,D0 ...D0 IS L := K-28 - DELETED <6/13/91, JPO> <T4>
|
|
SUBI.L #27,D0 ...D0 IS L := K-27 - ADDED <6/13/91, JPO> <T4>
|
|
MOVE.L #0,ENDFLAG(a6)
|
|
BRA.B WORK
|
|
LASTLOOP:
|
|
CLR.L D0 ...D0 IS L := 0
|
|
MOVE.L #1,ENDFLAG(a6)
|
|
|
|
WORK:
|
|
*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
|
|
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
|
|
|
|
*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
|
|
*--2**L * (PIby2_1), 2**L * (PIby2_2)
|
|
|
|
MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI
|
|
SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI)
|
|
|
|
MOVE.L #$A2F9836E,FP_SCR1+4(a6)
|
|
MOVE.L #$4E44152A,FP_SCR1+8(a6)
|
|
MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI)
|
|
|
|
FMOVE.X FP0,FP2
|
|
FMUL.X FP_SCR1(a6),FP2
|
|
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
|
|
*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
|
|
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
|
|
*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
|
|
*--US THE DESIRED VALUE IN FLOATING POINT.
|
|
|
|
*--HIDE SIX CYCLES OF INSTRUCTION
|
|
MOVE.L A1,D2
|
|
SWAP D2
|
|
ANDI.L #$80000000,D2
|
|
ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL
|
|
MOVE.L D2,TWOTO63(a6)
|
|
|
|
MOVE.L D0,D2
|
|
ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2)
|
|
|
|
*--FP2 IS READY
|
|
FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED
|
|
|
|
*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
|
|
MOVE.W D2,FP_SCR2(a6)
|
|
CLR.W FP_SCR2+2(a6)
|
|
MOVE.L #$C90FDAA2,FP_SCR2+4(a6)
|
|
CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1
|
|
|
|
*--FP2 IS READY
|
|
FSUB.S TWOTO63(a6),FP2 ...FP2 is N
|
|
|
|
ADDI.L #$00003FDD,D0
|
|
MOVE.W D0,FP_SCR3(a6)
|
|
CLR.W FP_SCR3+2(a6)
|
|
MOVE.L #$85A308D3,FP_SCR3+4(a6)
|
|
CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2
|
|
|
|
MOVE.L ENDFLAG(a6),D0
|
|
|
|
*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
|
|
*--P2 = 2**(L) * Piby2_2
|
|
FMOVE.X FP2,FP4
|
|
FMul.X FP_SCR2(a6),FP4 ...W = N*P1
|
|
FMove.X FP2,FP5
|
|
FMul.X FP_SCR3(a6),FP5 ...w = N*P2
|
|
FMove.X FP4,FP3
|
|
*--we want P+p = W+w but |p| <= half ulp of P
|
|
*--Then, we need to compute A := R-P and a := r-p
|
|
FAdd.X FP5,FP3 ...FP3 is P
|
|
FSub.X FP3,FP4 ...W-P
|
|
|
|
FSub.X FP3,FP0 ...FP0 is A := R - P
|
|
FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w
|
|
|
|
FMove.X FP0,FP3 ...FP3 A
|
|
FSub.X FP4,FP1 ...FP1 is a := r - p
|
|
|
|
*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
|
|
*--|r| <= half ulp of R.
|
|
FAdd.X FP1,FP0 ...FP0 is R := A+a
|
|
*--No need to calculate r if this is the last loop
|
|
; CMPI.L #0,D0 ; DELETED <5/7/91, JPO> <T3>
|
|
TST.L D0 ; <5/7/91, JPO> <T3>
|
|
BGT.B TRESTORE ; label renamed <1/7/91, JPO>
|
|
|
|
*--Need to calculate r
|
|
FSub.X FP0,FP3 ...A-R
|
|
FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a
|
|
BRA.W LOOP
|
|
|
|
TRESTORE: ; label renamed <1/7/91, JPO>
|
|
; FMOVE.L FP2,NTAN(a6) ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
FMOVE.L FP2,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
|
|
MOVE.L (A7)+,D2
|
|
FMOVEM.X (A7)+,FP2-FP5
|
|
|
|
|
|
; MOVE.L NTAN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
; ROR.L #1,D0 ; DELETED from subroutine <5/7/91, JPO> <T3>
|
|
|
|
|
|
; BRA.W TANCONT ; DELETED from subroutine <5/7/91, JPO> <T3>
|
|
|
|
RTS ; return from subroutine <5/7/91, JPO> <T3>
|
|
|
|
|