; ; File: InvTrig.a ; ; Contains: Routines to emulate inverse trig functions ; ; Originally Written by: Motorola Inc. ; Adapted to Apple/MPW: Jon Okada ; ; Copyright: © 1990, 1991 by Apple Computer, Inc., all rights reserved. ; ; This file is used in these builds: Mac32 ; ; Change History (most recent first): ; ; <2> 3/30/91 BG Rolling in Jon Okada's latest changes. ; <1> 12/14/90 BG First checked into TERROR/BBS. ; invtrig.a ; Based upon Motorola files 'sacos.sa', 'sasin.sa', and 'satan.sa'. ; sacos ; CHANGE LOG ; 04 Jan 91 JPO Moved constants PI and PIBY2 to file 'constants.a'. ; * * sacos.sa 3.1 12/10/90 * * Description: The entry point sAcos computes the inverse cosine of * an input argument; sAcosd does the same except for denormalized * input. * * Input: Double-extended number X in location pointed to * by address register a0. * * Output: The value arccos(X) returned in floating-point register Fp0. * * Accuracy and Monotonicity: The returned result is within 3 ulps in * 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the * result is subsequently rounded to double precision. The * result is provably monotonic in double precision. * * Speed: The program sCOS takes approximately 310 cycles. * * Algorithm: * * ACOS * 1. If |X| >= 1, go to 3. * * 2. (|X| < 1) Calculate acos(X) by * z := (1-X) / (1+X) * acos(X) = 2 * atan( sqrt(z) ). * Exit. * * 3. If |X| > 1, go to 5. * * 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. * * 5. (|X| > 1) Generate an invalid operation by 0 * infinity. * Exit. * * Copyright (C) Motorola, Inc. 1990 * All Rights Reserved * * THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA * The copyright notice above does not evidence any * actual or intended publication of such source code. * SACOS IDNT 2,1 Motorola 040 Floating Point Software Package sacosd: *--ACOS(X) = PI/2 FOR DENORMALIZED X fmove.l d1,fpcr ;...load user's rounding mode/precision FMOVE.X PIBY2,FP0 bra t_frcinx sacos: FMOVE.X (a0),FP0 ;...LOAD INPUT move.l (a0),d0 ;...pack exponent with upper 16 fraction move.w 4(a0),d0 ANDI.L #$7FFFFFFF,D0 CMPI.L #$3FFF8000,D0 BGE.B ACOSBIG *--THIS IS THE USUAL CASE, |X| < 1 *--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) FMOVE.S #"$3F800000",FP1 FADD.X FP0,FP1 ;...1+X FNEG.X FP0 ;... -X FADD.S #"$3F800000",FP0 ;...1-X FDIV.X FP1,FP0 ;...(1-X)/(1+X) FSQRT.X FP0 ;...SQRT((1-X)/(1+X)) fmovem.x fp0,(a0) ;...overwrite input move.l d1,-(sp) ;save original users fpcr clr.l d1 bsr satan ...ATAN(SQRT([1-X]/[1+X])) fMOVE.L (sp)+,fpcr ;restore users exceptions FADD.X FP0,FP0 ...2 * ATAN( STUFF ) bra t_frcinx ACOSBIG: FABS.X FP0 FCMP.S #"$3F800000",FP0 fbgt t_operr ;cause an operr exception *--|X| = 1, ACOS(X) = 0 OR PI move.l (a0),d0 ;...pack exponent with upper 16 fraction move.w 4(a0),d0 CMP.L #0,D0 ;D0 has original exponent+fraction BGT.B ACOSP1 *--X = -1 *Returns PI and inexact exception FMOVE.X PI,FP0 FMOVE.L d1,FPCR FADD.S #"$00800000",FP0 ;cause an inexact exception to be put * ;into the 040 - will not trap until next * ;fp inst. bra t_frcinx ACOSP1: FMOVE.L d1,FPCR FMOVE.S #"$00000000",FP0 rts ;Facos of +1 is exact ; sasin ; CHANGE LOG: ; 04 Jan 91 JPO Deleted constant PIBY2 (already exists in file 'constants.a'). ; * * sasin.sa 3.1 12/10/90 * * Description: The entry point sAsin computes the inverse sine of * an input argument; sAsind does the same except for denormalized * input. * * Input: Double-extended number X in location pointed to * by address register a0. * * Output: The value arcsin(X) returned in floating-point register Fp0. * * Accuracy and Monotonicity: The returned result is within 3 ulps in * 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the * result is subsequently rounded to double precision. The * result is provably monotonic in double precision. * * Speed: The program sASIN takes approximately 310 cycles. * * Algorithm: * * ASIN * 1. If |X| >= 1, go to 3. * * 2. (|X| < 1) Calculate asin(X) by * z := sqrt( [1-X][1+X] ) * asin(X) = atan( x / z ). * Exit. * * 3. If |X| > 1, go to 5. * * 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit. * * 5. (|X| > 1) Generate an invalid operation by 0 * infinity. * Exit. * * Copyright (C) Motorola, Inc. 1990 * All Rights Reserved * * THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA * The copyright notice above does not evidence any * actual or intended publication of such source code. * SASIN IDNT 2,1 Motorola 040 Floating Point Software Package sasind: *--ASIN(X) = X FOR DENORMALIZED X bra t_extdnrm sasin: FMOVE.X (a0),FP0 ;...LOAD INPUT move.l (a0),d0 move.w 4(a0),d0 ANDI.L #$7FFFFFFF,D0 CMPI.L #$3FFF8000,D0 BGE.B asinbig *--THIS IS THE USUAL CASE, |X| < 1 *--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) FMOVE.S #"$3F800000",FP1 FSUB.X FP0,FP1 ;...1-X fmovem.x fp2,-(a7) FMOVE.S #"$3F800000",FP2 FADD.X FP0,FP2 ;...1+X FMUL.X FP2,FP1 ;...(1+X)(1-X) fmovem.x (a7)+,fp2 FSQRT.X FP1 ;...SQRT([1-X][1+X]) FDIV.X FP1,FP0 ;...X/SQRT([1-X][1+X]) fmovem.x fp0,(a0) bsr.b satan bra t_frcinx asinbig: FABS.X FP0 ...|X| FCMP.S #"$3F800000",FP0 fbgt t_operr ;cause an operr exception *--|X| = 1, ASIN(X) = +- PI/2. FMOVE.X PIBY2,FP0 move.l (a0),d0 ANDI.L #$80000000,D0 ;...SIGN BIT OF X ORI.L #$3F800000,D0 ;...+-1 IN SGL FORMAT MOVE.L D0,-(sp) ;...push SIGN(X) IN SGL-FMT FMOVE.L d1,FPCR FMUL.S (sp)+,FP0 bra t_frcinx ; satan ; CHANGE LOG: ; 04 Jan 91 JPO Deleted constants BOUNDS1 and ONE because they are not used. ; Deleted constants PPIBY2, NPIBY2, and NTINY, changing code ; to refer to constants PIBY2, MPIBY2 (in file 'dofunc.a'), ; and PTINY, respectively. Moved constants ATANA1-ATANA3, ; ATANB1-ATANB6, ATANC1-ATANC5, PTINY, and table ATANTBL to ; file 'constants.a'. Changed names of local variables X, ; XDCARE, XFRAC, and XFRACLO to XATAN, XATANDC, XATANF, and ; XATANFL, respectively. ; * * satan.sa 3.1 12/10/90 * * The entry point satan computes the arctagent of an * input value. satand does the same except the input value is a * denormalized number. * * Input: Double-extended value in memory location pointed to by address * register a0. * * Output: Arctan(X) returned in floating-point register Fp0. * * Accuracy and Monotonicity: The returned result is within 2 ulps in * 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the * result is subsequently rounded to double precision. The * result is provably monotonic in double precision. * * Speed: The program satan takes approximately 160 cycles for input * argument X such that 1/16 < |X| < 16. For the other arguments, * the program will run no worse than 10% slower. * * Algorithm: * Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. * * Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. * Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits * of X with a bit-1 attached at the 6-th bit position. Define u * to be u = (X-F) / (1 + X*F). * * Step 3. Approximate arctan(u) by a polynomial poly. * * Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values * calculated beforehand. Exit. * * Step 5. If |X| >= 16, go to Step 7. * * Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. * * Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. * Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. * * Copyright (C) Motorola, Inc. 1990 * All Rights Reserved * * THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA * The copyright notice above does not evidence any * actual or intended publication of such source code. * satan IDNT 2,1 Motorola 040 Floating Point Software Package ;X equ FP_SCR1 ; deleted <1/4/91, JPO> ;XDCARE equ X+2 ;XFRAC equ X+4 ;XFRACLO equ X+8 XATAN equ FP_SCR1 ; <1/4/91, JPO> XATANDC equ XATAN+2 XATANF equ XATAN+4 XATANFL equ XATAN+8 ATANF equ FP_SCR2 ATANFHI equ ATANF+4 ATANFLO equ ATANF+8 satand: *--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT bra t_extdnrm satan: *--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S FMOVE.X (A0),FP0 ;...LOAD INPUT MOVE.L (A0),D0 MOVE.W 4(A0),D0 FMOVE.X FP0,XATAN(a6) ; <1/4/91, JPO> ANDI.L #$7FFFFFFF,D0 CMPI.L #$3FFB8000,D0 ;...|X| >= 1/16? BGE.B ATANOK1 BRA.W ATANSM ATANOK1: CMPI.L #$4002FFFF,D0 ;...|X| < 16 ? BLE.B ATANMAIN BRA.W ATANBIG *--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE *--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). *--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN *--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE *--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS *--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR *--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO *--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE *--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL *--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE *--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION *--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION *--WILL INVOLVE A VERY LONG POLYNOMIAL. *--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS *--WE CHOSE F TO BE +-2^K * 1.BBBB1 *--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE *--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE *--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS *-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). ATANMAIN: MOVE.W #$0000,XATANDC(a6) ;...CLEAN UP X JUST IN CASE <1/4/91, JPO> ANDI.L #$F8000000,XATANF(a6) ;...FIRST 5 BITS <1/4/91, JPO> ORI.L #$04000000,XATANF(a6) ;...SET 6-TH BIT TO 1 <1/4/91, JPO> MOVE.L #$00000000,XATANFL(a6) ;...LOCATION OF X IS NOW F <1/4/91, JPO> FMOVE.X FP0,FP1 ;...FP1 IS X FMUL.X XATAN(a6),FP1 ;...FP1 IS X*F, NOTE THAT X*F > 0 <1/4/91, JPO> FSUB.X XATAN(a6),FP0 ;...FP0 IS X-F <1/4/91, JPO> FADD.S #"$3F800000",FP1 ...FP1 IS 1 + X*F FDIV.X FP1,FP0 ;...FP0 IS U = (X-F)/(1+X*F) *--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) *--CREATE ATAN(F) AND STORE IT IN ATANF, AND *--SAVE REGISTERS FP2. MOVE.L d2,-(a7) ;...SAVE d2 TEMPORARILY MOVE.L d0,d2 ;...THE EXPO AND 16 BITS OF X ANDI.L #$00007800,d0 ;...4 VARYING BITS OF F'S FRACTION ANDI.L #$7FFF0000,d2 ;...EXPONENT OF F SUBI.L #$3FFB0000,d2 ;...K+4 ASR.L #1,d2 ADD.L d2,d0 ;...THE 7 BITS IDENTIFYING F ASR.L #7,d0 ;...INDEX INTO TBL OF ATAN(|F|) LEA ATANTBL,a1 ADDA.L d0,a1 ;...ADDRESS OF ATAN(|F|) MOVE.L (a1)+,ATANF(a6) MOVE.L (a1)+,ATANFHI(a6) MOVE.L (a1)+,ATANFLO(a6) ;...ATANF IS NOW ATAN(|F|) MOVE.L XATAN(a6),d0 ;...LOAD SIGN AND EXPO. AGAIN <1/4/91, JPO> ANDI.L #$80000000,d0 ;...SIGN(F) OR.L d0,ATANF(a6) ;...ATANF IS NOW SIGN(F)*ATAN(|F|) MOVE.L (a7)+,d2 ;...RESTORE d2 *--THAT'S ALL I HAVE TO DO FOR NOW, *--BUT ALAS, THE DIVIDE IS STILL CRANKING! *--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS *--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U *--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. *--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) *--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. *--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT *--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED FMOVE.X FP0,FP1 FMUL.X FP1,FP1 FMOVE.D ATANA3,FP2 FADD.X FP1,FP2 ;...A3+V FMUL.X FP1,FP2 ;...V*(A3+V) FMUL.X FP0,FP1 ;...U*V FADD.D ATANA2,FP2 ;...A2+V*(A3+V) FMUL.D ATANA1,FP1 ;...A1*U*V FMUL.X FP2,FP1 ;...A1*U*V*(A2+V*(A3+V)) FADD.X FP1,FP0 ;...ATAN(U), FP1 RELEASED FMOVE.L d1,FPCR ;restore users exceptions FADD.X ATANF(a6),FP0 ;...ATAN(X) bra t_frcinx ATANBORS: *--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. *--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. CMPI.L #$3FFF8000,d0 BGT.W ATANBIG ...I.E. |X| >= 16 ATANSM: *--|X| <= 1/16 *--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE *--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) *--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) *--WHERE Y = X*X, AND Z = Y*Y. CMPI.L #$3FD78000,d0 BLT.B ATANTINY *--COMPUTE POLYNOMIAL FMUL.X FP0,FP0 ...FP0 IS Y = X*X MOVE.W #$0000,XATANDC(a6) ; <1/4/91, JPO> FMOVE.X FP0,FP1 FMUL.X FP1,FP1 ;...FP1 IS Z = Y*Y FMOVE.D ATANB6,FP2 FMOVE.D ATANB5,FP3 FMUL.X FP1,FP2 ;...Z*B6 FMUL.X FP1,FP3 ;...Z*B5 FADD.D ATANB4,FP2 ;...B4+Z*B6 FADD.D ATANB3,FP3 ;...B3+Z*B5 FMUL.X FP1,FP2 ;...Z*(B4+Z*B6) FMUL.X FP3,FP1 ;...Z*(B3+Z*B5) FADD.D ATANB2,FP2 ;...B2+Z*(B4+Z*B6) FADD.D ATANB1,FP1 ;...B1+Z*(B3+Z*B5) FMUL.X FP0,FP2 ;...Y*(B2+Z*(B4+Z*B6)) FMUL.X XATAN(a6),FP0 ;...X*Y <1/4/91, JPO> FADD.X FP2,FP1 ;...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] FMUL.X FP1,FP0 ;...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) FMOVE.L d1,FPCR ;restore users exceptions FADD.X XATAN(a6),FP0 ; <1/4/91, JPO> bra t_frcinx ATANTINY: *--|X| < 2^(-40), ATAN(X) = X MOVE.W #$0000,XATANDC(a6) ; <1/4/91, JPO> FMOVE.L d1,FPCR ;restore users exceptions FMOVE.X XATAN(a6),FP0 ;last inst - possible exception set <1/4/91, JPO> bra t_frcinx ATANBIG: *--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, *--RETURN SIGN(X)*PI/2 + ATAN(-1/X). CMPI.L #$40638000,d0 BGT.W ATANHUGE *--APPROXIMATE ATAN(-1/X) BY *--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' *--THIS CAN BE RE-WRITTEN AS *--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. FMOVE.S #"$BF800000",FP1 ;...LOAD -1 FDIV.X FP0,FP1 ;...FP1 IS -1/X *--DIVIDE IS STILL CRANKING FMOVE.X FP1,FP0 ;...FP0 IS X' FMUL.X FP0,FP0 ;...FP0 IS Y = X'*X' FMOVE.X FP1,XATAN(a6) ;...X IS REALLY X' <1/4/91, JPO> FMOVE.X FP0,FP1 FMUL.X FP1,FP1 ;...FP1 IS Z = Y*Y FMOVE.D ATANC5,FP3 FMOVE.D ATANC4,FP2 FMUL.X FP1,FP3 ;...Z*C5 FMUL.X FP1,FP2 ;...Z*B4 FADD.D ATANC3,FP3 ;...C3+Z*C5 FADD.D ATANC2,FP2 ;...C2+Z*C4 FMUL.X FP3,FP1 ;...Z*(C3+Z*C5), FP3 RELEASED FMUL.X FP0,FP2 ;...Y*(C2+Z*C4) FADD.D ATANC1,FP1 ;...C1+Z*(C3+Z*C5) FMUL.X XATAN(a6),FP0 ;...X'*Y <1/4/91, JPO> FADD.X FP2,FP1 ;...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] FMUL.X FP1,FP0 ;...X'*Y*([B1+Z*(B3+Z*B5)] * ;... +[Y*(B2+Z*(B4+Z*B6))]) FADD.X XATAN(a6),FP0 ; <1/4/91, JPO> FMOVE.L d1,FPCR ;restore users exceptions btst.b #7,(a0) beq.b pos_big neg_big: ; FADD.X NPIBY2,FP0 ; deleted <1/4/91, JPO> FADD.X MPIBY2,FP0 ; <1/4/91, JPO> bra t_frcinx pos_big: ; FADD.X PPIBY2,FP0 ; deleted <1/4/91, JPO> FADD.X PIBY2,FP0 ; <1/4/91, JPO> bra t_frcinx ATANHUGE: *--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY btst.b #7,(a0) beq.b pos_huge neg_huge: ; FMOVE.X NPIBY2,fp0 ; deleted <1/4/91, JPO> FMOVE.X MPIBY2,fp0 ; <1/4/91, JPO> fmove.l d1,fpcr ; fsub.x NTINY,fp0 ; deleted <1/4/91, JPO> fadd.x PTINY,fp0 ; <1/4/91, JPO> bra t_frcinx pos_huge: ; FMOVE.X PPIBY2,fp0 ; deleted <1/4/91, JPO> FMOVE.X PIBY2,fp0 ; <1/4/91, JPO> fmove.l d1,fpcr fsub.x PTINY,fp0 bra t_frcinx