mirror of
https://github.com/elliotnunn/mac-rom.git
synced 2024-12-28 01:29:20 +00:00
4325cdcc78
Resource forks are included only for .rsrc files. These are DeRezzed into their data fork. 'ckid' resources, from the Projector VCS, are not included. The Tools directory, containing mostly junk, is also excluded.
1188 lines
32 KiB
Plaintext
1188 lines
32 KiB
Plaintext
;
|
|
; File: Trig.a
|
|
;
|
|
; Contains: Routines to emulate trigonometric functions
|
|
;
|
|
; Originally Written by: Motorola Inc.
|
|
; Adapted to Apple/MPW: Jon Okada
|
|
;
|
|
; Copyright: © 1990, 1991 by Apple Computer, Inc., all rights reserved.
|
|
;
|
|
; This file is used in these builds: Mac32
|
|
;
|
|
; Change History (most recent first):
|
|
;
|
|
; <3+> 6/24/91 BG Modified trig argument reduction routine per Motorola
|
|
; version 2.0 to fix sign bug for FCOS of large argument
|
|
; (order of 10**8).
|
|
; <3> 5/24/91 BG Cleanup and optimization to the Sin and Tan routines.
|
|
; <2> 3/30/91 BG Rolling in Jon Okada's latest changes.
|
|
; <1> 12/14/90 BG First checked into TERROR/BBS.
|
|
|
|
; trig.a
|
|
|
|
; Based upon Motorola files 'ssin.sa', 'sto_res.sa', and 'stan.sa'.
|
|
|
|
|
|
; ssin
|
|
|
|
; CHANGE LOG:
|
|
; 07 Jan 91 JPO Deleted constants BOUNDS1, INVTWOPI, TWOPI1, and
|
|
; TWOPI2 (not referenced). Moved constants to
|
|
; file 'constants.a'. Deleted variable equate for
|
|
; XFRAC (not referenced). Renamed variable labels X,
|
|
; XDCARE, and N to XSIN, XSINDC, and NSIN, respectively.
|
|
; Deleted labels "NODD" and "c_is_fp3" (not referenced).
|
|
; Renamed labels REDUCEX, LOOP, WORK, LASTLOOP, and
|
|
; RESTORE to SREDUCEX, SLOOP, SWORK, SLASTLOOP, and
|
|
; SRESTORE, respectively. Appended routine "sto_cos"
|
|
; from Motorola file 'sto_res.sa'.
|
|
; 07 May 91 JPO Renamed variable NSIN to NTRIG. Deleted variable NTAN
|
|
; and changed all references to it to references to NTRIG.
|
|
; Removed routines "SINBORS", "SCBORS", and "TANBORS"
|
|
; (not referenced). Removed routine "SREDUCEX" and converted
|
|
; all branches to it to subroutine calls to "REDUCEX".
|
|
; Converted routine "REDUCEX" to a subroutine and all
|
|
; branches to it to subroutine calls. Added code in
|
|
; "REDUCEX" to do a single remainder step if the input
|
|
; is very large in order to prevent unwanted overflow.
|
|
; 13 Jun 91 JPO Modified trig argument reduction routine per Motorola
|
|
; version 2.0 to fix sign bug for FCOS of large argument
|
|
; (order of 10**8).
|
|
;
|
|
|
|
*
|
|
* ssin.sa 3.1 12/10/90
|
|
*
|
|
* The entry point sSIN computes the sine of an input argument
|
|
* sCOS computes the cosine, and sSINCOS computes both. The
|
|
* corresponding entry points with a "d" computes the same
|
|
* corresponding function values for denormalized inputs.
|
|
*
|
|
* Input: Double-extended number X in location pointed to
|
|
* by address register a0.
|
|
*
|
|
* Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or
|
|
* COS is requested. Otherwise, for SINCOS, sin(X) is returned
|
|
* in Fp0, and cos(X) is returned in Fp1.
|
|
*
|
|
* Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
|
|
*
|
|
* Accuracy and Monotonicity: The returned result is within 1 ulp in
|
|
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
|
|
* result is subsequently rounded to double precision. The
|
|
* result is provably monotonic in double precision.
|
|
*
|
|
* Speed: The programs sSIN and sCOS take approximately 150 cycles for
|
|
* input argument X such that |X| < 15Pi, which is the the usual
|
|
* situation. The speed for sSINCOS is approximately 190 cycles.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* SIN and COS:
|
|
* 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
|
|
*
|
|
* 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
|
|
*
|
|
* 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
|
|
* k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte
|
|
* k by k := k + AdjN.
|
|
*
|
|
* 4. If k is even, go to 6.
|
|
*
|
|
* 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
|
|
* where cos(r) is approximated by an even polynomial in r,
|
|
* 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r.
|
|
* Exit.
|
|
*
|
|
* 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
|
|
* where sin(r) is approximated by an odd polynomial in r
|
|
* r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r.
|
|
* Exit.
|
|
*
|
|
* 7. If |X| > 1, go to 9.
|
|
*
|
|
* 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
|
|
*
|
|
* 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
|
|
*
|
|
* SINCOS:
|
|
* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
|
|
*
|
|
* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
|
|
* k = N mod 4, so in particular, k = 0,1,2,or 3.
|
|
*
|
|
* 3. If k is even, go to 5.
|
|
*
|
|
* 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
|
|
* j1 exclusive or with the l.s.b. of k.
|
|
* sgn1 := (-1)**j1, sgn2 := (-1)**j2.
|
|
* SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
|
|
* sin(r) and cos(r) are computed as odd and even polynomials
|
|
* in r, respectively. Exit
|
|
*
|
|
* 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
|
|
* SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
|
|
* sin(r) and cos(r) are computed as odd and even polynomials
|
|
* in r, respectively. Exit
|
|
*
|
|
* 6. If |X| > 1, go to 8.
|
|
*
|
|
* 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
|
|
*
|
|
* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
|
|
*
|
|
|
|
* Copyright (C) Motorola, Inc. 1990
|
|
* All Rights Reserved
|
|
*
|
|
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
|
|
* The copyright notice above does not evidence any
|
|
* actual or intended publication of such source code.
|
|
|
|
* SSIN IDNT 2,1 Motorola 040 Floating Point Software Package
|
|
|
|
|
|
INARG equ FP_SCR4
|
|
|
|
;X equ FP_SCR5 ; Renamed <1/7/91, JPO>
|
|
;XDCARE equ X+2 ; Renamed <1/7/91, JPO>
|
|
;XFRAC equ X+4 ; Deleted <1/7/91, JPO>
|
|
|
|
XSIN EQU FP_SCR5 ; <1/7/91, JPO>
|
|
XSINDC EQU XSIN+2
|
|
|
|
RPRIME equ FP_SCR1
|
|
SPRIME equ FP_SCR2
|
|
|
|
POSNEG1 equ L_SCR1
|
|
TWOTO63 equ L_SCR1
|
|
|
|
ENDFLAG equ L_SCR2
|
|
|
|
;N equ L_SCR2 ; Renamed <1/7/91, JPO>
|
|
|
|
;NSIN equ L_SCR2 ; <1/7/91, JPO> - RENAMED <T3>
|
|
NTRIG equ L_SCR2 ; <5/7/91, JPO> <T3>
|
|
|
|
ADJN equ L_SCR3
|
|
|
|
|
|
ssind:
|
|
*--SIN(X) = X FOR DENORMALIZED X
|
|
bra t_extdnrm
|
|
|
|
|
|
scosd:
|
|
*--COS(X) = 1 FOR DENORMALIZED X
|
|
|
|
FMOVE.S #"$3F800000",FP0
|
|
*
|
|
* 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
|
|
*
|
|
fmove.l #0,fpsr
|
|
*
|
|
bra t_frcinx
|
|
|
|
|
|
ssin:
|
|
*--SET ADJN TO 0
|
|
MOVE.L #0,ADJN(a6)
|
|
BRA.B SINBGN
|
|
|
|
|
|
scos:
|
|
*--SET ADJN TO 1
|
|
MOVE.L #1,ADJN(a6)
|
|
|
|
SINBGN:
|
|
*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
|
|
|
|
FMOVE.X (a0),FP0 ...LOAD INPUT
|
|
|
|
MOVE.L (A0),D0
|
|
MOVE.W 4(A0),D0
|
|
FMOVE.X FP0,XSIN(a6) ; <1/7/91, JPO>
|
|
ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X
|
|
|
|
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
|
|
BGE.B SOK1
|
|
BRA.W SINSM
|
|
|
|
SOK1:
|
|
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
|
|
BLT.B SINMAIN
|
|
; BRA.W SREDUCEX ; label renamed <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
|
|
BRA.B SINCONT
|
|
|
|
SINMAIN:
|
|
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
|
|
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
|
|
FMOVE.X FP0,FP1
|
|
FMUL.D TWOBYPI,FP1 ...X*2/PI
|
|
|
|
*--HIDE THE NEXT THREE INSTRUCTIONS
|
|
LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
|
|
|
|
|
|
*--FP1 IS NOW READY
|
|
; FMOVE.L FP1,NSIN(a6) ...CONVERT TO INTEGER <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
FMOVE.L FP1,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
|
|
|
|
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
|
|
|
|
ASL.L #4,D0
|
|
ADDA.L D0,A1 ...A1 IS THE ADDRESS OF N*PIBY2
|
|
* ...WHICH IS IN TWO PIECES Y1 & Y2
|
|
|
|
FSUB.X (A1)+,FP0 ...X-Y1
|
|
*--HIDE THE NEXT ONE
|
|
FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2
|
|
|
|
SINCONT:
|
|
*--continuation from REDUCEX
|
|
|
|
*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
|
|
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
|
|
ADD.L ADJN(a6),D0 ...SEE IF D0 IS ODD OR EVEN
|
|
ROR.L #1,D0 ...D0 WAS ODD IFF D0 IS NEGATIVE
|
|
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
|
|
; BLT.B COSPOLY ; DELETED <5/7/91, JPO> <T3>
|
|
BMI.B COSPOLY ; <5/7/91, JPO> <T3>
|
|
|
|
SINPOLY:
|
|
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
|
|
*--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
|
|
*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
|
|
*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
|
|
*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
|
|
*--WHERE T=S*S.
|
|
*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
|
|
*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
|
|
FMOVE.X FP0,XSIN(a6) ...X IS R <1/7/91, JPO>
|
|
FMUL.X FP0,FP0 ...FP0 IS S
|
|
*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
|
|
FMOVE.D SINA7,FP3
|
|
FMOVE.D SINA6,FP2
|
|
*--FP0 IS NOW READY
|
|
FMOVE.X FP0,FP1
|
|
FMUL.X FP1,FP1 ...FP1 IS T
|
|
*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
|
|
|
|
ROR.L #1,D0
|
|
ANDI.L #$80000000,D0
|
|
* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
|
|
EOR.L D0,XSIN(a6) ...X IS NOW R'= SGN*R <1/7/91, JPO>
|
|
|
|
FMUL.X FP1,FP3 ...TA7
|
|
FMUL.X FP1,FP2 ...TA6
|
|
|
|
FADD.D SINA5,FP3 ...A5+TA7
|
|
FADD.D SINA4,FP2 ...A4+TA6
|
|
|
|
FMUL.X FP1,FP3 ...T(A5+TA7)
|
|
FMUL.X FP1,FP2 ...T(A4+TA6)
|
|
|
|
FADD.D SINA3,FP3 ...A3+T(A5+TA7)
|
|
FADD.X SINA2,FP2 ...A2+T(A4+TA6)
|
|
|
|
FMUL.X FP3,FP1 ...T(A3+T(A5+TA7))
|
|
|
|
FMUL.X FP0,FP2 ...S(A2+T(A4+TA6))
|
|
FADD.X SINA1,FP1 ...A1+T(A3+T(A5+TA7))
|
|
FMUL.X XSIN(a6),FP0 ...R'*S <1/7/91, JPO>
|
|
|
|
FADD.X FP2,FP1 ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
|
|
*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
|
|
*--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
|
|
|
|
|
|
FMUL.X FP1,FP0 ...SIN(R')-R'
|
|
*--FP1 RELEASED.
|
|
|
|
FMOVE.L d1,FPCR ;restore users exceptions
|
|
FADD.X XSIN(a6),FP0 ;last inst - possible exception set <1/7/91, JPO>
|
|
bra t_frcinx
|
|
|
|
|
|
COSPOLY:
|
|
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
|
|
*--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
|
|
*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
|
|
*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
|
|
*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
|
|
*--WHERE T=S*S.
|
|
*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
|
|
*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
|
|
*--AND IS THEREFORE STORED AS SINGLE PRECISION.
|
|
|
|
FMUL.X FP0,FP0 ...FP0 IS S
|
|
*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
|
|
FMOVE.D COSB8,FP2
|
|
FMOVE.D COSB7,FP3
|
|
*--FP0 IS NOW READY
|
|
FMOVE.X FP0,FP1
|
|
FMUL.X FP1,FP1 ...FP1 IS T
|
|
*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
|
|
FMOVE.X FP0,XSIN(a6) ...X IS S <1/7/91, JPO>
|
|
ROR.L #1,D0
|
|
ANDI.L #$80000000,D0
|
|
* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
|
|
|
|
FMUL.X FP1,FP2 ...TB8
|
|
*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
|
|
EOR.L D0,XSIN(a6) ...X IS NOW S'= SGN*S <1/7/91, JPO>
|
|
ANDI.L #$80000000,D0
|
|
|
|
FMUL.X FP1,FP3 ...TB7
|
|
*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
|
|
ORI.L #$3F800000,D0 ...D0 IS SGN IN SINGLE
|
|
MOVE.L D0,POSNEG1(a6)
|
|
|
|
FADD.D COSB6,FP2 ...B6+TB8
|
|
FADD.D COSB5,FP3 ...B5+TB7
|
|
|
|
FMUL.X FP1,FP2 ...T(B6+TB8)
|
|
FMUL.X FP1,FP3 ...T(B5+TB7)
|
|
|
|
FADD.D COSB4,FP2 ...B4+T(B6+TB8)
|
|
FADD.X COSB3,FP3 ...B3+T(B5+TB7)
|
|
|
|
FMUL.X FP1,FP2 ...T(B4+T(B6+TB8))
|
|
FMUL.X FP3,FP1 ...T(B3+T(B5+TB7))
|
|
|
|
FADD.X COSB2,FP2 ...B2+T(B4+T(B6+TB8))
|
|
FADD.S COSB1,FP1 ...B1+T(B3+T(B5+TB7))
|
|
|
|
FMUL.X FP2,FP0 ...S(B2+T(B4+T(B6+TB8)))
|
|
*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
|
|
*--FP2 RELEASED.
|
|
|
|
|
|
FADD.X FP1,FP0
|
|
*--FP1 RELEASED
|
|
|
|
FMUL.X XSIN(a6),FP0 ; <1/7/91, JPO>
|
|
|
|
FMOVE.L d1,FPCR ;restore users exceptions
|
|
FADD.S POSNEG1(a6),FP0 ;last inst - possible exception set
|
|
bra t_frcinx
|
|
|
|
|
|
;SINBORS: ; routine DELETED - <5/7/91, JPO> <T3>
|
|
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
|
|
*--IF |X| < 2**(-40), RETURN X OR 1.
|
|
; CMPI.L #$3FFF8000,D0 <T3>
|
|
; BGT.B SREDUCEX ; label renamed <1/7/91, JPO> <T3>
|
|
|
|
|
|
SINSM:
|
|
MOVE.L ADJN(a6),D0
|
|
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
|
|
BGT.B COSTINY
|
|
|
|
SINTINY:
|
|
MOVE.W #$0000,XSINDC(a6) ...JUST IN CASE <1/7/91, JPO>
|
|
FMOVE.L d1,FPCR ;restore users exceptions
|
|
FMOVE.X XSIN(a6),FP0 ;last inst - possible exception set <1/7/91, JPO>
|
|
bra t_frcinx
|
|
|
|
|
|
COSTINY:
|
|
FMOVE.S #"$3F800000",FP0
|
|
|
|
FMOVE.L d1,FPCR ;restore users exceptions
|
|
FSUB.S #"$00800000",FP0 ;last inst - possible exception set
|
|
bra t_frcinx
|
|
|
|
|
|
; Routine "SREDUCEX" is DELETED and replaced by subroutine "REDUCEX" <T3> thru next <T3>
|
|
; (below) <5/7/91, JPO>.
|
|
|
|
;SREDUCEX: ; label renamed <1/7/91, JPO>
|
|
*--WHEN SREDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
|
|
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
|
|
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
|
|
|
|
; FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5
|
|
; MOVE.L D2,-(A7)
|
|
; FMOVE.S #"$00000000",FP1
|
|
|
|
*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
|
|
*--integer quotient will be stored in N
|
|
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
|
|
|
|
;SLOOP: ; label renamed <1/7/91, JPO>
|
|
; FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2
|
|
; MOVE.W INARG(a6),D0
|
|
; MOVE.L D0,A1 ...save a copy of D0
|
|
; ANDI.L #$00007FFF,D0
|
|
; SUBI.L #$00003FFF,D0 ...D0 IS K
|
|
; CMPI.L #28,D0
|
|
; BLE.B SLASTLOOP ; label renamed <1/7/91, JPO>
|
|
;CONTLOOP: ; label not referenced <1/7/91, JPO>
|
|
; SUBI.L #28,D0 ...D0 IS L := K-28
|
|
; MOVE.L #0,ENDFLAG(a6)
|
|
; BRA.B SWORK ; label renamed <1/7/91, JPO>
|
|
;SLASTLOOP: ; label renamed <1/7/91, JPO>
|
|
; CLR.L D0 ...D0 IS L := 0
|
|
; MOVE.L #1,ENDFLAG(a6)
|
|
|
|
;SWORK: ; label renamed <1/7/91, JPO>
|
|
*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
|
|
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
|
|
|
|
*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
|
|
*--2**L * (PIby2_1), 2**L * (PIby2_2)
|
|
|
|
; MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI
|
|
; SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI)
|
|
|
|
; MOVE.L #$A2F9836E,FP_SCR1+4(a6)
|
|
; MOVE.L #$4E44152A,FP_SCR1+8(a6)
|
|
; MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI)
|
|
|
|
; FMOVE.X FP0,FP2
|
|
; FMUL.X FP_SCR1(a6),FP2
|
|
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
|
|
*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
|
|
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
|
|
*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
|
|
*--US THE DESIRED VALUE IN FLOATING POINT.
|
|
|
|
*--HIDE SIX CYCLES OF INSTRUCTION
|
|
; MOVE.L A1,D2
|
|
; SWAP D2
|
|
; ANDI.L #$80000000,D2
|
|
; ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL
|
|
; MOVE.L D2,TWOTO63(a6)
|
|
|
|
; MOVE.L D0,D2
|
|
; ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2)
|
|
|
|
*--FP2 IS READY
|
|
; FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED
|
|
|
|
*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
|
|
; MOVE.W D2,FP_SCR2(a6)
|
|
; CLR.W FP_SCR2+2(a6)
|
|
; MOVE.L #$C90FDAA2,FP_SCR2+4(a6)
|
|
; CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1
|
|
|
|
*--FP2 IS READY
|
|
; FSUB.S TWOTO63(a6),FP2 ...FP2 is N
|
|
|
|
; ADDI.L #$00003FDD,D0
|
|
; MOVE.W D0,FP_SCR3(a6)
|
|
; CLR.W FP_SCR3+2(a6)
|
|
; MOVE.L #$85A308D3,FP_SCR3+4(a6)
|
|
; CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2
|
|
|
|
; MOVE.L ENDFLAG(a6),D0
|
|
|
|
*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
|
|
*--P2 = 2**(L) * Piby2_2
|
|
; FMOVE.X FP2,FP4
|
|
; FMul.X FP_SCR2(a6),FP4 ...W = N*P1
|
|
; FMove.X FP2,FP5
|
|
; FMul.X FP_SCR3(a6),FP5 ...w = N*P2
|
|
; FMove.X FP4,FP3
|
|
*--we want P+p = W+w but |p| <= half ulp of P
|
|
*--Then, we need to compute A := R-P and a := r-p
|
|
; FAdd.X FP5,FP3 ...FP3 is P
|
|
; FSub.X FP3,FP4 ...W-P
|
|
|
|
; FSub.X FP3,FP0 ...FP0 is A := R - P
|
|
; FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w
|
|
|
|
; FMove.X FP0,FP3 ...FP3 A
|
|
; FSub.X FP4,FP1 ...FP1 is a := r - p
|
|
|
|
*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
|
|
*--|r| <= half ulp of R.
|
|
; FAdd.X FP1,FP0 ...FP0 is R := A+a
|
|
*--No need to calculate r if this is the last loop
|
|
; CMPI.L #0,D0
|
|
; BGT.B SRESTORE ; label renamed <1/7/91, JPO>
|
|
|
|
*--Need to calculate r
|
|
; FSub.X FP0,FP3 ...A-R
|
|
; FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a
|
|
; BRA.W SLOOP
|
|
|
|
;SRESTORE: ; label renamed <1/7/91, JPO>
|
|
; FMOVE.L FP2,NSIN(a6)
|
|
; MOVE.L (A7)+,D2
|
|
; FMOVEM.X (A7)+,FP2-FP5
|
|
|
|
|
|
; MOVE.L ADJN(a6),D0
|
|
; CMPI.L #4,D0
|
|
|
|
; BLT.W SINCONT
|
|
; BRA.B SCCONT <T3>
|
|
|
|
|
|
ssincosd:
|
|
*--SIN AND COS OF X FOR DENORMALIZED X
|
|
|
|
FMOVE.S #"$3F800000",FP1
|
|
bsr sto_cos ;store cosine result
|
|
bra t_extdnrm
|
|
|
|
|
|
ssincos:
|
|
*--SET ADJN TO 4
|
|
MOVE.L #4,ADJN(a6)
|
|
|
|
FMOVE.X (a0),FP0 ...LOAD INPUT
|
|
|
|
MOVE.L (A0),D0
|
|
MOVE.W 4(A0),D0
|
|
FMOVE.X FP0,XSIN(a6) ; <1/7/91, JPO>
|
|
ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X
|
|
|
|
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
|
|
BGE.B SCOK1
|
|
BRA.W SCSM
|
|
|
|
SCOK1:
|
|
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
|
|
BLT.B SCMAIN
|
|
; BRA.W SREDUCEX ; label renamed - DELETED <5/7/91, JPO> <T3>
|
|
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
|
|
BRA.B SCCONT ; continue below <5/7/91, JPO> <T3>
|
|
|
|
|
|
SCMAIN:
|
|
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
|
|
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
|
|
FMOVE.X FP0,FP1
|
|
FMUL.D TWOBYPI,FP1 ...X*2/PI
|
|
|
|
*--HIDE THE NEXT THREE INSTRUCTIONS
|
|
LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
|
|
|
|
|
|
*--FP1 IS NOW READY
|
|
; FMOVE.L FP1,NSIN(a6) ...CONVERT TO INTEGER <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
FMOVE.L FP1,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
|
|
|
|
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
|
|
ASL.L #4,D0
|
|
ADDA.L D0,A1 ...ADDRESS OF N*PIBY2, IN Y1, Y2
|
|
|
|
FSUB.X (A1)+,FP0 ...X-Y1
|
|
FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2
|
|
|
|
SCCONT:
|
|
*--continuation point from REDUCEX
|
|
|
|
*--HIDE THE NEXT TWO
|
|
; MOVE.L NSIN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(a6),D0 ; variable RENAMED <5/7/91, JPO> <T3>
|
|
ROR.L #1,D0
|
|
|
|
; CMPI.L #0,D0 ...D0 < 0 IFF N IS ODD - DELETED (unnecessary) <5/7/91, JPO> <T3>
|
|
; BGE.W NEVEN ; DELETED <5/7/91, JPO> <T3>
|
|
BPL.W NEVEN ; <5/7/91, JPO> <T3>
|
|
|
|
;NODD: ; label DELETED <1/7/91, JPO>
|
|
*--REGISTERS SAVED SO FAR: D0, A0, FP2.
|
|
|
|
FMOVE.X FP0,RPRIME(a6)
|
|
FMUL.X FP0,FP0 ...FP0 IS S = R*R
|
|
FMOVE.D SINA7,FP1 ...A7
|
|
FMOVE.D COSB8,FP2 ...B8
|
|
FMUL.X FP0,FP1 ...SA7
|
|
MOVE.L d2,-(A7)
|
|
MOVE.L D0,d2
|
|
FMUL.X FP0,FP2 ...SB8
|
|
ROR.L #1,d2
|
|
ANDI.L #$80000000,d2
|
|
|
|
FADD.D SINA6,FP1 ...A6+SA7
|
|
EOR.L D0,d2
|
|
ANDI.L #$80000000,d2
|
|
FADD.D COSB7,FP2 ...B7+SB8
|
|
|
|
FMUL.X FP0,FP1 ...S(A6+SA7)
|
|
EOR.L d2,RPRIME(a6)
|
|
MOVE.L (A7)+,d2
|
|
FMUL.X FP0,FP2 ...S(B7+SB8)
|
|
ROR.L #1,D0
|
|
ANDI.L #$80000000,D0
|
|
|
|
FADD.D SINA5,FP1 ...A5+S(A6+SA7)
|
|
MOVE.L #$3F800000,POSNEG1(a6)
|
|
EOR.L D0,POSNEG1(a6)
|
|
FADD.D COSB6,FP2 ...B6+S(B7+SB8)
|
|
|
|
FMUL.X FP0,FP1 ...S(A5+S(A6+SA7))
|
|
FMUL.X FP0,FP2 ...S(B6+S(B7+SB8))
|
|
FMOVE.X FP0,SPRIME(a6)
|
|
|
|
FADD.D SINA4,FP1 ...A4+S(A5+S(A6+SA7))
|
|
EOR.L D0,SPRIME(a6)
|
|
FADD.D COSB5,FP2 ...B5+S(B6+S(B7+SB8))
|
|
|
|
FMUL.X FP0,FP1 ...S(A4+...)
|
|
FMUL.X FP0,FP2 ...S(B5+...)
|
|
|
|
FADD.D SINA3,FP1 ...A3+S(A4+...)
|
|
FADD.D COSB4,FP2 ...B4+S(B5+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(A3+...)
|
|
FMUL.X FP0,FP2 ...S(B4+...)
|
|
|
|
FADD.X SINA2,FP1 ...A2+S(A3+...)
|
|
FADD.X COSB3,FP2 ...B3+S(B4+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(A2+...)
|
|
FMUL.X FP0,FP2 ...S(B3+...)
|
|
|
|
FADD.X SINA1,FP1 ...A1+S(A2+...)
|
|
FADD.X COSB2,FP2 ...B2+S(B3+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(A1+...)
|
|
FMUL.X FP2,FP0 ...S(B2+...)
|
|
|
|
|
|
|
|
FMUL.X RPRIME(a6),FP1 ...R'S(A1+...)
|
|
FADD.S COSB1,FP0 ...B1+S(B2...)
|
|
FMUL.X SPRIME(a6),FP0 ...S'(B1+S(B2+...))
|
|
|
|
move.l d1,-(sp) ;restore users mode & precision
|
|
andi.l #$ff,d1 ;mask off all exceptions
|
|
fmove.l d1,FPCR
|
|
FADD.X RPRIME(a6),FP1 ...COS(X)
|
|
bsr sto_cos ;store cosine result
|
|
FMOVE.L (sp)+,FPCR ;restore users exceptions
|
|
FADD.S POSNEG1(a6),FP0 ...SIN(X)
|
|
|
|
bra t_frcinx
|
|
|
|
|
|
NEVEN:
|
|
*--REGISTERS SAVED SO FAR: FP2.
|
|
|
|
FMOVE.X FP0,RPRIME(a6)
|
|
FMUL.X FP0,FP0 ...FP0 IS S = R*R
|
|
FMOVE.D COSB8,FP1 ...B8
|
|
FMOVE.D SINA7,FP2 ...A7
|
|
FMUL.X FP0,FP1 ...SB8
|
|
FMOVE.X FP0,SPRIME(a6)
|
|
FMUL.X FP0,FP2 ...SA7
|
|
ROR.L #1,D0
|
|
ANDI.L #$80000000,D0
|
|
FADD.D COSB7,FP1 ...B7+SB8
|
|
FADD.D SINA6,FP2 ...A6+SA7
|
|
EOR.L D0,RPRIME(a6)
|
|
EOR.L D0,SPRIME(a6)
|
|
FMUL.X FP0,FP1 ...S(B7+SB8)
|
|
ORI.L #$3F800000,D0
|
|
MOVE.L D0,POSNEG1(a6)
|
|
FMUL.X FP0,FP2 ...S(A6+SA7)
|
|
|
|
FADD.D COSB6,FP1 ...B6+S(B7+SB8)
|
|
FADD.D SINA5,FP2 ...A5+S(A6+SA7)
|
|
|
|
FMUL.X FP0,FP1 ...S(B6+S(B7+SB8))
|
|
FMUL.X FP0,FP2 ...S(A5+S(A6+SA7))
|
|
|
|
FADD.D COSB5,FP1 ...B5+S(B6+S(B7+SB8))
|
|
FADD.D SINA4,FP2 ...A4+S(A5+S(A6+SA7))
|
|
|
|
FMUL.X FP0,FP1 ...S(B5+...)
|
|
FMUL.X FP0,FP2 ...S(A4+...)
|
|
|
|
FADD.D COSB4,FP1 ...B4+S(B5+...)
|
|
FADD.D SINA3,FP2 ...A3+S(A4+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(B4+...)
|
|
FMUL.X FP0,FP2 ...S(A3+...)
|
|
|
|
FADD.X COSB3,FP1 ...B3+S(B4+...)
|
|
FADD.X SINA2,FP2 ...A2+S(A3+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(B3+...)
|
|
FMUL.X FP0,FP2 ...S(A2+...)
|
|
|
|
FADD.X COSB2,FP1 ...B2+S(B3+...)
|
|
FADD.X SINA1,FP2 ...A1+S(A2+...)
|
|
|
|
FMUL.X FP0,FP1 ...S(B2+...)
|
|
fmul.x fp2,fp0 ...s(a1+...)
|
|
|
|
|
|
|
|
FADD.S COSB1,FP1 ...B1+S(B2...)
|
|
FMUL.X RPRIME(a6),FP0 ...R'S(A1+...)
|
|
FMUL.X SPRIME(a6),FP1 ...S'(B1+S(B2+...))
|
|
|
|
move.l d1,-(sp) ;save users mode & precision
|
|
andi.l #$ff,d1 ;mask off all exceptions
|
|
fmove.l d1,FPCR
|
|
FADD.S POSNEG1(a6),FP1 ...COS(X)
|
|
bsr.b sto_cos ;store cosine result
|
|
FMOVE.L (sp)+,FPCR ;restore users exceptions
|
|
FADD.X RPRIME(a6),FP0 ...SIN(X)
|
|
|
|
bra t_frcinx
|
|
|
|
;SCBORS: ; routine DELETED <5/7/91, JPO> <T3>
|
|
; CMPI.L #$3FFF8000,D0 ; <T3>
|
|
; BGT.W SREDUCEX ; label renamed <T3>
|
|
|
|
|
|
SCSM:
|
|
MOVE.W #$0000,XSINDC(a6) ; <1/7/91, JPO>
|
|
FMOVE.S #"$3F800000",FP1
|
|
|
|
move.l d1,-(sp) ;save users mode & precision
|
|
andi.l #$ff,d1 ;mask off all exceptions
|
|
fmove.l d1,FPCR
|
|
FSUB.S #"$00800000",FP1
|
|
bsr.b sto_cos ;store cosine result
|
|
FMOVE.L (sp)+,FPCR ;restore users exceptions
|
|
FMOVE.X XSIN(a6),FP0 ; <1/7/91, JPO>
|
|
bra t_frcinx
|
|
|
|
|
|
; sto_cos (from Motorola file 'sto_res.sa')
|
|
sto_cos:
|
|
bfextu CMDREG1B(a6){13:3},d0 ;extract cos destination
|
|
cmpi.b #3,d0 ;check for fp0/fp1 cases
|
|
ble.b c_fp0123
|
|
fmovem.x fp1,-(a7)
|
|
moveq.l #7,d1
|
|
sub.l d0,d1 ;d1 = 7- (dest. reg. no.)
|
|
clr.l d0
|
|
bset.l d1,d0 ;d0 is dynamic register mask
|
|
fmovem.x (a7)+,d0
|
|
rts
|
|
c_fp0123:
|
|
cmpi.b #0,d0
|
|
beq.b c_is_fp0
|
|
cmpi.b #1,d0
|
|
beq.b c_is_fp1
|
|
cmpi.b #2,d0
|
|
beq.b c_is_fp2
|
|
;c_is_fp3: ; label DELETED <1/7/91, JPO>
|
|
fmovem.x fp1,USER_FP3(a6)
|
|
rts
|
|
c_is_fp2:
|
|
fmovem.x fp1,USER_FP2(a6)
|
|
rts
|
|
c_is_fp1:
|
|
fmovem.x fp1,USER_FP1(a6)
|
|
rts
|
|
c_is_fp0:
|
|
fmovem.x fp1,USER_FP0(a6)
|
|
rts
|
|
|
|
|
|
|
|
|
|
; stan
|
|
|
|
; CHANGE LOG:
|
|
; 07 Jan 91 JPO Deleted constants BOUNDS1, TWOBYPI, INVTWOPI, TWOPI1, and
|
|
; TWOPI2. Moved constants and constant table PITBL to
|
|
; file 'constants.a'. Deleted variable equates for INARG,
|
|
; TWOTO63, and ENDFLAG (duplication). Renamed variable N
|
|
; to NTAN. Renamed label "RESTORE" to "TRESTORE".
|
|
; 07 May 91 JPO Deleted variable NTAN and changed all references to it to
|
|
; refer to NTRIG. Removed routine "TANBORS" (not referenced).
|
|
; Converted routine "REDUCEX" to a subroutine and all
|
|
; branches to it to subroutine calls. Added code in
|
|
; "REDUCEX" to do a single remainder step if the input
|
|
; is very large in order to prevent unwanted overflow.
|
|
;
|
|
|
|
*
|
|
* stan.sa 3.2 12/18/90
|
|
*
|
|
* The entry point stan computes the tangent of
|
|
* an input argument;
|
|
* stand does the same except for denormalized input.
|
|
*
|
|
* Input: Double-extended number X in location pointed to
|
|
* by address register a0.
|
|
*
|
|
* Output: The value tan(X) returned in floating-point register Fp0.
|
|
*
|
|
* Accuracy and Monotonicity: The returned result is within 3 ulp in
|
|
* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
|
|
* result is subsequently rounded to double precision. The
|
|
* result is provably monotonic in double precision.
|
|
*
|
|
* Speed: The program sTAN takes approximately 170 cycles for
|
|
* input argument X such that |X| < 15Pi, which is the the usual
|
|
* situation.
|
|
*
|
|
* Algorithm:
|
|
*
|
|
* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
|
|
*
|
|
* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
|
|
* k = N mod 2, so in particular, k = 0 or 1.
|
|
*
|
|
* 3. If k is odd, go to 5.
|
|
*
|
|
* 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
|
|
* rational function U/V where
|
|
* U = r + r*s*(P1 + s*(P2 + s*P3)), and
|
|
* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r.
|
|
* Exit.
|
|
*
|
|
* 5. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
|
|
* rational function U/V where
|
|
* U = r + r*s*(P1 + s*(P2 + s*P3)), and
|
|
* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
|
|
* -Cot(r) = -V/U. Exit.
|
|
*
|
|
* 6. If |X| > 1, go to 8.
|
|
*
|
|
* 7. (|X|<2**(-40)) Tan(X) = X. Exit.
|
|
*
|
|
* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
|
|
*
|
|
|
|
* Copyright (C) Motorola, Inc. 1990
|
|
* All Rights Reserved
|
|
*
|
|
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
|
|
* The copyright notice above does not evidence any
|
|
* actual or intended publication of such source code.
|
|
|
|
* STAN IDNT 2,1 Motorola 040 Floating Point Software Package
|
|
|
|
|
|
|
|
;INARG equ FP_SCR4 ; deleted <1/7/91, JPO>
|
|
|
|
;TWOTO63 equ L_SCR1 ; deleted <1/7/91, JPO>
|
|
;ENDFLAG equ L_SCR2 ; deleted <1/7/91, JPO>
|
|
|
|
;N equ L_SCR3 ; renamed <1/7/91, JPO>
|
|
;NTAN equ L_SCR3 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
|
|
|
|
stand:
|
|
*--TAN(X) = X FOR DENORMALIZED X
|
|
|
|
bra t_extdnrm
|
|
|
|
|
|
stan:
|
|
FMOVE.X (a0),FP0 ...LOAD INPUT
|
|
|
|
MOVE.L (A0),D0
|
|
MOVE.W 4(A0),D0
|
|
ANDI.L #$7FFFFFFF,D0
|
|
|
|
CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
|
|
BGE.B TANOK1
|
|
BRA.W TANSM
|
|
TANOK1:
|
|
CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
|
|
BLT.B TANMAIN
|
|
; BRA.W REDUCEX ; DELETED <5/7/91, JPO> <T3>
|
|
BSR.W REDUCEX ; NEW subroutine <5/7/91, JPO> <T3>
|
|
MOVE.L NTRIG(A6),D0 ; get N <5/7/91, JPO> <T3>
|
|
ROR.L #1,D0 ; rotate bit 0 into bit 31 <5/7/91, JPO> <T3>
|
|
BRA.B TANCONT ; continue below <T3>
|
|
|
|
TANMAIN:
|
|
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
|
|
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
|
|
FMOVE.X FP0,FP1
|
|
FMUL.D TWOBYPI,FP1 ...X*2/PI
|
|
|
|
*--HIDE THE NEXT TWO INSTRUCTIONS
|
|
lea.l PITBL+$200,a1 ...TABLE OF N*PI/2, N = -32,...,32
|
|
|
|
*--FP1 IS NOW READY
|
|
FMOVE.L FP1,D0 ...CONVERT TO INTEGER
|
|
|
|
ASL.L #4,D0
|
|
ADDA.L D0,a1 ...ADDRESS N*PIBY2 IN Y1, Y2
|
|
|
|
FSUB.X (a1)+,FP0 ...X-Y1
|
|
*--HIDE THE NEXT ONE
|
|
|
|
FSUB.S (a1),FP0 ...FP0 IS R = (X-Y1)-Y2
|
|
|
|
ROR.L #5,D0
|
|
ANDI.L #$80000000,D0 ...D0 WAS ODD IFF D0 < 0
|
|
|
|
TANCONT:
|
|
|
|
; CMPI.L #0,D0 ; DELETED (unnecessary) <5/7/91, JPO> <T3>
|
|
; BLT.B NODD ; DELETED <5/7/91, JPO> <T3>
|
|
BMI.B NODD ; <5/7/91, JPO> <T3>
|
|
|
|
FMOVE.X FP0,FP1
|
|
FMUL.X FP1,FP1 ...S = R*R
|
|
|
|
FMOVE.D TANQ4,FP3
|
|
FMOVE.D TANP3,FP2
|
|
|
|
FMUL.X FP1,FP3 ...SQ4
|
|
FMUL.X FP1,FP2 ...SP3
|
|
|
|
FADD.D TANQ3,FP3 ...Q3+SQ4
|
|
FADD.X TANP2,FP2 ...P2+SP3
|
|
|
|
FMUL.X FP1,FP3 ...S(Q3+SQ4)
|
|
FMUL.X FP1,FP2 ...S(P2+SP3)
|
|
|
|
FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4)
|
|
FADD.X TANP1,FP2 ...P1+S(P2+SP3)
|
|
|
|
FMUL.X FP1,FP3 ...S(Q2+S(Q3+SQ4))
|
|
FMUL.X FP1,FP2 ...S(P1+S(P2+SP3))
|
|
|
|
FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4))
|
|
FMUL.X FP0,FP2 ...RS(P1+S(P2+SP3))
|
|
|
|
FMUL.X FP3,FP1 ...S(Q1+S(Q2+S(Q3+SQ4)))
|
|
|
|
|
|
FADD.X FP2,FP0 ...R+RS(P1+S(P2+SP3))
|
|
|
|
|
|
FADD.S #"$3F800000",FP1 ...1+S(Q1+...)
|
|
|
|
FMOVE.L d1,fpcr ;restore users exceptions
|
|
FDIV.X FP1,FP0 ;last inst - possible exception set
|
|
|
|
bra t_frcinx
|
|
|
|
NODD:
|
|
FMOVE.X FP0,FP1
|
|
FMUL.X FP0,FP0 ...S = R*R
|
|
|
|
FMOVE.D TANQ4,FP3
|
|
FMOVE.D TANP3,FP2
|
|
|
|
FMUL.X FP0,FP3 ...SQ4
|
|
FMUL.X FP0,FP2 ...SP3
|
|
|
|
FADD.D TANQ3,FP3 ...Q3+SQ4
|
|
FADD.X TANP2,FP2 ...P2+SP3
|
|
|
|
FMUL.X FP0,FP3 ...S(Q3+SQ4)
|
|
FMUL.X FP0,FP2 ...S(P2+SP3)
|
|
|
|
FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4)
|
|
FADD.X TANP1,FP2 ...P1+S(P2+SP3)
|
|
|
|
FMUL.X FP0,FP3 ...S(Q2+S(Q3+SQ4))
|
|
FMUL.X FP0,FP2 ...S(P1+S(P2+SP3))
|
|
|
|
FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4))
|
|
FMUL.X FP1,FP2 ...RS(P1+S(P2+SP3))
|
|
|
|
FMUL.X FP3,FP0 ...S(Q1+S(Q2+S(Q3+SQ4)))
|
|
|
|
|
|
FADD.X FP2,FP1 ...R+RS(P1+S(P2+SP3))
|
|
FADD.S #"$3F800000",FP0 ...1+S(Q1+...)
|
|
|
|
|
|
FMOVE.X FP1,-(sp)
|
|
EORI.L #$80000000,(sp)
|
|
|
|
FMOVE.L d1,fpcr ;restore users exceptions
|
|
FDIV.X (sp)+,FP0 ;last inst - possible exception set
|
|
|
|
bra t_frcinx
|
|
|
|
;TANBORS: ; routine DELETED <5/7/91, JPO> <T3>
|
|
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
|
|
*--IF |X| < 2**(-40), RETURN X OR 1.
|
|
; CMPI.L #$3FFF8000,D0 <T3>
|
|
; BGT.B REDUCEX <T3>
|
|
|
|
TANSM:
|
|
|
|
FMOVE.X FP0,-(sp)
|
|
FMOVE.L d1,fpcr ;restore users exceptions
|
|
FMOVE.X (sp)+,FP0 ;last inst - posibble exception set
|
|
|
|
bra t_frcinx
|
|
|
|
|
|
; Routine "REDUCEX" converted to subroutine with additional code to do a <T3>
|
|
; single REMAINDER step via subtraction if abs(input) >= $7ffe 0000 ffff0000 00000000 <T3>
|
|
; <5/7/91, JPO>. <T3>
|
|
|
|
REDUCEX:
|
|
*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
|
|
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
|
|
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
|
|
|
|
FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5
|
|
MOVE.L D2,-(A7)
|
|
FMOVE.S #"$00000000",FP1 ; initial argument in FP0/FP1
|
|
|
|
; If compact form of abs(arg) in D0 = $7ffeffff, argument is so large <T3> thru next <T3>
|
|
; that there is danger of unwanted overflow in first loop iteration.
|
|
; In this case, reduce argument by one remainder step to make subsequent
|
|
; reduction safe <5/7/91, JPO>.
|
|
|
|
cmpi.l #$7ffeffff,d0 ; is argument dangerously large? <5/7/91, JPO>
|
|
bne.b LOOP ; no. <5/7/91, JPO>
|
|
|
|
move.l #$7ffe0000,FP_SCR2(a6) ; yes. create 2**16383*PI/2 at FP_SCR2 <5/7/91, JPO>
|
|
move.l #$c90fdaa2,FP_SCR2+4(a6)
|
|
clr.l FP_SCR2+8(a6)
|
|
ftest.x fp0 ; test sign of argument <5/7/91, JPO>
|
|
move.l #$7fdc0000,FP_SCR3(a6) ; create low half of 2**16383*PI/2
|
|
move.l #$85a308d3,FP_SCR3+4(a6) ; at FP_SCR3 <5/7/91, JPO>
|
|
clr.l FP_SCR3+8(a6)
|
|
fblt.w @1 ; negative arg <5/7/91, JPO>
|
|
|
|
or.w #$8000,FP_SCR2(a6) ; positive arg. negate FP_SCR2
|
|
or.w #$8000,FP_SCR3(a6) ; and FP_SCR3 <5/7/91, JPO>
|
|
@1:
|
|
fadd.x FP_SCR2(a6),fp0 ; high part of reduction <5/7/91, JPO>
|
|
fmove.x fp0,fp1 ; save high result in fp1 <5/7/91, JPO>
|
|
fadd.x FP_SCR3(a6),fp0 ; low part of reduction <5/7/91, JPO>
|
|
fsub.x fp0,fp1 ; determine tail of result <5/7/91, JPO>
|
|
fadd.x FP_SCR3(a6),fp1 ; fp0/fp1 are reduced argument <5/7/91, JPO> <T3>
|
|
|
|
*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
|
|
*--integer quotient will be stored in N
|
|
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
|
|
|
|
; On entry, D0 contains compact form of abs(input). Use this value
|
|
; to determine if argument is so large (D0 = $7ffeffff) that there
|
|
; is a danger of spurious overflow in reduction calculation. If so,
|
|
; reduce argument by one remainder step by subtracting 2**16383*PI/2
|
|
; from the argument and continuing the reduction normally.
|
|
|
|
LOOP:
|
|
FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2
|
|
MOVE.W INARG(a6),D0
|
|
MOVE.L D0,A1 ...save a copy of D0
|
|
ANDI.L #$00007FFF,D0
|
|
SUBI.L #$00003FFF,D0 ...D0 IS K
|
|
CMPI.L #28,D0
|
|
BLE.B LASTLOOP
|
|
;CONTLOOP: ; label not referenced <1/7/91, JPO>
|
|
; SUBI.L #28,D0 ...D0 IS L := K-28 - DELETED <6/13/91, JPO> <T4>
|
|
SUBI.L #27,D0 ...D0 IS L := K-27 - ADDED <6/13/91, JPO> <T4>
|
|
MOVE.L #0,ENDFLAG(a6)
|
|
BRA.B WORK
|
|
LASTLOOP:
|
|
CLR.L D0 ...D0 IS L := 0
|
|
MOVE.L #1,ENDFLAG(a6)
|
|
|
|
WORK:
|
|
*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
|
|
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
|
|
|
|
*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
|
|
*--2**L * (PIby2_1), 2**L * (PIby2_2)
|
|
|
|
MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI
|
|
SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI)
|
|
|
|
MOVE.L #$A2F9836E,FP_SCR1+4(a6)
|
|
MOVE.L #$4E44152A,FP_SCR1+8(a6)
|
|
MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI)
|
|
|
|
FMOVE.X FP0,FP2
|
|
FMUL.X FP_SCR1(a6),FP2
|
|
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
|
|
*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
|
|
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
|
|
*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
|
|
*--US THE DESIRED VALUE IN FLOATING POINT.
|
|
|
|
*--HIDE SIX CYCLES OF INSTRUCTION
|
|
MOVE.L A1,D2
|
|
SWAP D2
|
|
ANDI.L #$80000000,D2
|
|
ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL
|
|
MOVE.L D2,TWOTO63(a6)
|
|
|
|
MOVE.L D0,D2
|
|
ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2)
|
|
|
|
*--FP2 IS READY
|
|
FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED
|
|
|
|
*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
|
|
MOVE.W D2,FP_SCR2(a6)
|
|
CLR.W FP_SCR2+2(a6)
|
|
MOVE.L #$C90FDAA2,FP_SCR2+4(a6)
|
|
CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1
|
|
|
|
*--FP2 IS READY
|
|
FSUB.S TWOTO63(a6),FP2 ...FP2 is N
|
|
|
|
ADDI.L #$00003FDD,D0
|
|
MOVE.W D0,FP_SCR3(a6)
|
|
CLR.W FP_SCR3+2(a6)
|
|
MOVE.L #$85A308D3,FP_SCR3+4(a6)
|
|
CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2
|
|
|
|
MOVE.L ENDFLAG(a6),D0
|
|
|
|
*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
|
|
*--P2 = 2**(L) * Piby2_2
|
|
FMOVE.X FP2,FP4
|
|
FMul.X FP_SCR2(a6),FP4 ...W = N*P1
|
|
FMove.X FP2,FP5
|
|
FMul.X FP_SCR3(a6),FP5 ...w = N*P2
|
|
FMove.X FP4,FP3
|
|
*--we want P+p = W+w but |p| <= half ulp of P
|
|
*--Then, we need to compute A := R-P and a := r-p
|
|
FAdd.X FP5,FP3 ...FP3 is P
|
|
FSub.X FP3,FP4 ...W-P
|
|
|
|
FSub.X FP3,FP0 ...FP0 is A := R - P
|
|
FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w
|
|
|
|
FMove.X FP0,FP3 ...FP3 A
|
|
FSub.X FP4,FP1 ...FP1 is a := r - p
|
|
|
|
*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
|
|
*--|r| <= half ulp of R.
|
|
FAdd.X FP1,FP0 ...FP0 is R := A+a
|
|
*--No need to calculate r if this is the last loop
|
|
; CMPI.L #0,D0 ; DELETED <5/7/91, JPO> <T3>
|
|
TST.L D0 ; <5/7/91, JPO> <T3>
|
|
BGT.B TRESTORE ; label renamed <1/7/91, JPO>
|
|
|
|
*--Need to calculate r
|
|
FSub.X FP0,FP3 ...A-R
|
|
FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a
|
|
BRA.W LOOP
|
|
|
|
TRESTORE: ; label renamed <1/7/91, JPO>
|
|
; FMOVE.L FP2,NTAN(a6) ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
FMOVE.L FP2,NTRIG(a6) ; variable RENAMED <5/7/91, JPO> <T3>
|
|
MOVE.L (A7)+,D2
|
|
FMOVEM.X (A7)+,FP2-FP5
|
|
|
|
|
|
; MOVE.L NTAN(a6),D0 ; <1/7/91, JPO> - DELETED <5/7/91, JPO> <T3>
|
|
; ROR.L #1,D0 ; DELETED from subroutine <5/7/91, JPO> <T3>
|
|
|
|
|
|
; BRA.W TANCONT ; DELETED from subroutine <5/7/91, JPO> <T3>
|
|
|
|
RTS ; return from subroutine <5/7/91, JPO> <T3>
|
|
|
|
|