; ; File: HWElemsArcTg.a ; ; Contains: HW ATAN() routines ; ; Written by: Apple Numerics Group, DSG ; ; Copyright: © 1985-1993 by Apple Computer, Inc., all rights reserved. ; ; Change History (most recent first): ; ; 2/3/93 CSS Update from Horror: ;

9/29/92 BG Adding Jon Okada's latest fixes. ; <1> 10/24/91 SAM/KSM Rolled in Regatta file. ; ; Terror Change History: ; ; <1> 01/06/91 BG Added to TERROR/BBS for the time. ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; File 881ELEMStg.a, ;; ;; Function arctg(x), ;; ;; Implementation of tg for ½SANE. ;; ;; For machines with MC68881/68882 FPU. ;; ;; ;; ;; Copyright Apple Computer, Inc. 1985-7, 1989, 1990, 1992, 1993. ;; ;; ;; ;; Written by Ali Sazegari, started on August 11 1989, ;; ;; Added sections to handle flags correctly, December 14, 1989. ;; ;; Funny behavior on the input -inf: i can't get these flags to print with ;; ;; my fortran test program. ;; ;; A problem was found using the arctg with pascal and c compilers: ;; ;; the instruction MOVEM.W A1/A2,-(SP) caused a bus error. The reason ;; ;; is that on the restore side ( 3 occurences in this code ), MOVEM.W the ;; ;; result is sign extended to 32 bits ( akh! ) therefore garbeling both ;; ;; registers. MOVEM.W replaced by MOVEM.L, May 21, 1990. ;; ;; Grouped the A1-A3 save and restore together, May 22, 1990 ;; ;; ;; ;; N.B. This program is a hog, it uses the following registers: ;; ;; a0, d0, d1, fp0, fp1, fp2, fp3, fp4 & fp5. ;; ;; ;; ;; based on Elems881, package code for Macintosh by J. Coonen, ;; ;; who wrote it from pascal programs by D. Hough, C. McMaster and K. Hanson. ;; ;; ;; ;; Further Modification History: ;; ;; 23 Apr 92 Performance tuning done (better register usage, faster FPU ;; ;; ops used, front-end check for small/huge operands eliminates ;; ;; spurious underflowing. ... JPO ;; ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RAtan ; MOVEM.L A1/A2/A3,-(SP) ; a1 = sgn(T), where T is the argument DELETED <4/23/92, JPO> ; a2 is a boolean, cleared if argument ; is less than one, set otherwise ; use a3 for denormal flag use ; MOVEA.L A0,A3 ; keep a copy of the pointer to argument DELETED <4/23/92, JPO> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Classification of the argument with a neat trick. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; FMOVE.L FPSR,D0 ; what kind of arg? fpsr has the key MOVEQ #0,D1 ; clr sign/size flag <4/23/92, JPO> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Here is the strategy ( commonly known as the clay scoop ) by C. R. Lewis ;; ;; Register D0 has N, Z, I, NaN burried in it. Mask off unwanted bits to ;; ;; get them. Then, left shift NZINaN to have Z at MSB position. If D0 = 0 ;; ;; then arg is finite. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; MOVE.L #ExcMask,D1 ; mask it kid DELETED <4/23/92, JPO> ; AND.L D1,D0 ; we now have the meat DELETED<4/23/92, JPO> ANDI.L #ExcMask,D0 ; isolate FPCC bits <4/23/92, JPO> LSL.L #ShiftIt,D0 ; N->C, Z->MSB, I->30th & NaN->29th bit BNE.W StinkyBoys ; zero, infinite or NaN input <4/23/92, JPO> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; If arg is negative then set bit 31 of D1 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; BCS.B @70 ; T < 0 ? DELETED <4/23/92, JPO> ; MOVEA.W #0,A1 ; no T > 0, then clear the flag DELETED <4/23/92, JPO> ; BRA.B @71 ; DELETED <4/23/92, JPO> ;@70 label DELETED <4/23/92, JPO> ; MOVEA.W #1,A1 ; yes T < 0, then set the flag DELETED <4/23/92, JPO> ;@71 ; label DELETED <4/23/92, JPO> BCC.B @smchk ; input > 0 and so is flag D1 <4/23/92, JPO> BSET.L #31,D1 ; input < 0 and so is flag D1 <4/23/92, JPO> @smchk: ; label ADDED <4/23/92, JPO> BFEXTU (A0){1:15},D0 ; |arg| < 2^(-32)? <4/23/92, JPO> CMPI.W #$3FDF,D0 ; <4/23/92, JPO> BGE.B @bigchk ; no, check for big input <4/23/92, JPO> MOVE.L #SetInexact,D0 ; yes, return arg and INEXACT <4/23/92, JPO> BFTST (A0){1:16} ; if denormal, signal UNDERFLOW <4/23/92, JPO> BNE.B @done ; <4/23/92, JPO> OR.L #SetUFlow,D0 ; <4/23/92, JPO> @done: ; label ADDED <4/23/92, JPO> JMP (A4) ; <4/23/92, JPO> @bigchk: CMPI.W #$407F,D0 ; is unbiased exponent > 128? <4/23/92, JPO> BLE.B @atando ; no, evaluate atan <4/23/92, JPO> FMOVE.X FPKPI2,FP0 ; yes, return ¹/2 with arg sign <4/23/92, JPO> TST.L D1 ; sign of D1 is sign of arg <4/23/92, JPO> BPL.B @bigdone ; <4/23/92, JPO> FNEG.X FP0,FP0 ; arg < 0.0 <4/23/92, JPO> @bigdone: ; label added <4/23/92, JPO> MOVE.L #SetInexact,D0 ; signal INEXACT <4/23/92, JPO> JMP (A4) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; If arg is a non zero finite number, great! let's fall through, otherwise ;; ;; take the problem cases. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; BNE.W StinkyBoys ; take care of the bad guys (Z bar set) DELETED <4/23/92, JPO> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Go for the main course. Persian style! ;; ;; -- throughout this program, we use 'arctg' & 'atan' interchangeably. ;; ;; ;; ;; About the argument reduction: ATAN(T) is evaluated for 0 <= T <= 1 with ;; ;; either a Short or a Long formula depending on the value of T. ;; ;; ;; ;; Recall the identities: ;; ;; ;; ;; atan(-T) = -atan(T) ;; ;; atan(1/T) = pi/2 - atan(T) ;; ;; ;; ;; If T < 0 then atan(-T) is computed, and the result negated. ;; ;; If |T| > 1 then atan(1/|T|) is computed and the result subtracted from ¹/2;; ;; ;; ;; To compute atan of reduced T use Short or Long formula: ;; ;; ;; ;; T <= ATnCons Å 0.267 then arctg(T) = T - T * P(T*T) / Q(T*T), ;; ;; else arctg(T) = T - (A + (B*P(B*B)/Q(B*B) + x2fx2)) ;; ;; ;; ;; where x2 and x2fx2 are constants, about 0.5 and 0.05 and ;; ;; A = (T - x2)/(1 + (1/(T*x2))), ;; ;; B = (T - x2)/(1 + (T*x2)). ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @atando: ; label ADDED <4/23/92, JPO> FABS.X FP0,FP0 ; set up for comparison ; FCMP.W #1,FP0 ; DELETED <4/23/92, JPO> FCMP.S #"$3F800000",FP0 ; T = |arg| <= 1? <4/23/92, JPO> FBLE.W TisLTone ; is T <= 1 ? ; MOVEA.W #1,A2 ; not, T > 1, set the flag a2 DELETED <4/23/92, JPO> ; FMOVE.W #1,FP3 ; and invert the T so that 1/T < 1 DELETED <4/23/92, JPO> ADDQ.B #1,D1 ; set inversion flag (D1.B) <4/23/92, JPO> FMOVE.S #"$3F800000",FP3 ; FP3 <- 1.0 <4/23/92, JPO> FDIV.X FP0,FP3 ; fp0 = T, fp3 = 1/T FMOVE.X FP3,FP0 ; fp0 = 1/T, fp3 = 1/T < 1 BRA.B GoDoIt ; now continue with the computation TisLTone: ; MOVEA.W #0,A2 ; yes, T <= 1, clear the flag a2 DELETED <4/23/92, JPO> FMOVE.X FP0,FP3 ; duplicate argument: fp0 = fp3 = T <= 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Select short or long form of the formula based on reduced argument versus ;; ;; ATnCons, about 0.268. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; GoDoIt FCMP.X FPKATNCONS,FP0 ; start the selection process FBGT.W LongForm ; do we use the long formula? ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Short formula is selected, form T^2 and evaluate T * ( P(T^2) / Q(T^2) ) ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; FMUL.X FP0,FP0 ; fp0 = T*T, fp3 = T LEA ATANP,A0 ; A0 = coeff table ptr for PolyEval BSR PolyEval ; fp1 = PolyEval(P, T*T) FMOVE.X FP1,FP2 ; save P(arg) LEA ATANQ,A0 ; A0 = coeff table ptr for PolyEval BSR PolyEval ; fp1 = PolyEval(Q, T*T) FDIV.X FP1,FP2 ; form fp2 = P(T*T)/Q(T*T) FMUL.X FP3,FP2 ; form fp2 = T * P(T*T)/Q(T*T) FSUB.X FP2,FP3 ; form fp3 = T - T * P(T*T)/Q(T*T) FMOVE.X FP3,FP0 ; get the result in the right place GetOut: ; MOVE.W A2,D1 ; activate the boolean flag DELETED <4/23/92, JPO> TST.B D1 ; test inversion flag <4/23/92, JPO> BEQ.B @72 ; is T <= 1 ? ( the Z bar bit set ) FMOVE.X FPKPI2,FP1 ; get ¹/2 FSUB.X FP0,FP1 ; arctg(1/T) = ¹/2 - arctg(T) FMOVE.X FP1,FP0 ; put it in the right place old boy! @72: ; MOVE.W A1,D1 ; activate the zero flag DELETED <4/23/92, JPO> ; BEQ.B @73 ; is T negative? ( the Z bar bit set ) DELETED <4/23/92, JPO> ; FMUL.B #-1,FP0 ; T was negative then return -arctg(T) DELETED <4/23/92, JPO> TST.L D1 ; was arg negative? <4/23/92, JPO> BPL.B @73 ; no <4/23/92, JPO> FNEG.X FP0,FP0 ; yes, negate result <4/23/92, JPO> @73: ; BFEXTU (A3) {1:16},D0 ; get the exponent & 1st mantissa bit DELETED <4/23/92, JPO> ; MOVEM.L (SP)+,A1/A2/A3 ; restore sign, boolean & flag registers DELETED <4/23/92, JPO> ; BEQ.B @74 ; if D0 = 0 then current s = denorm DELETED <4/23/92, JPO> MOVE.L #SetInexact,D0 ; set the inexact flag ; MOVE.B #ClrUflow ,D0 ; and clear the underflow bit DELETED <4/23/92, JPO> JMP (A4) ;@74 MOVE.L #SetInexact,D0 ; set up the inexact flag DELETED <4/23/92, JPO> ; JMP (A4) ; go to the control routine DELETED <4/23/92, JPO> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Long formula is selected, proceed with caution, difficult terrain ahead! ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; LongForm FMOVEM FP4/FP5,-(SP) ; ay ya yay! save two more registers! FMUL.X FPKX2,FP3 ; form fp3 = T * x2 ; FMOVE.W #1,FP2 ; get ready for the inverse DELETED <4/23/92, JPO> FMOVE.S #"$3F800000",FP2 ; FP2 <- 1.0 <4/23/92, JPO> FDIV.X FP3,FP2 ; fp0 = T, fp2 = 1 / (T*x2), fp3 = T*x2 ; FADD.W #1,FP2 ; fp2 = 1 + 1 / (T*x2) DELETED <4/23/92, JPO> ; FADD.W #1,FP3 ; fp3 = 1 + ( T*x2 ) DELETED <4/23/92, JPO> FADD.S #"$3F800000",FP3 ; FP3 <- 1.0 + (T*x2) <4/23/92, JPO> FADD.S #"$3F800000",FP2 ; FP2 <- 1.0 + 1.0/(T*x2) <4/23/92, JPO> FMOVE.X FP0,FP1 ; copy T, fp0 = fp1 = T FSUB.X FPKX2,FP1 ; form fp1 = T - x2 FMOVE.X FP1,FP4 ; copy for the next move, fp4=fp1= T - x2 FDIV.X FP3,FP1 ; we have fp1 = B = (T-x2)/(1+(1+(T*x2)) FDIV.X FP2,FP4 ; and fp4 = A = (T-x2)/(1 + 1/(T*x2)) FMOVE.X FP1,FP3 ; fp0=T, fp1=B, fp2=junk, fp3=B & fp4=A FMUL.X FP1,FP1 ; ok boys, now fp1 = B*B, ready FMOVE.X FP0,FP2 ; hold it, PolyEval needs its arg in fp0 FMOVE.X FP1,FP0 ; fp0=fp1= B*B, fp2 = T, fp3 = B & fp4 = A LEA ATANP,A0 ; A0 = coeff table ptr for PolyEval BSR PolyEval ; fp1 = PolyEval(P, B*B) FMOVE.X FP1,FP5 ; save P(arg) LEA ATANQ,A0 ; A0 = coeff table ptr for PolyEval BSR PolyEval ; fp1 = PolyEval(Q, B*B) FDIV.X FP1,FP5 ; form fp5 = P(B*B)/Q(B*B) FMUL.X FP3,FP5 ; form fp5 = B * P(B*B)/Q(B*B) FADD.X FPKX2FX2,FP5 ; fp5 = B * P(B*B)/Q(B*B) + x2fx2 FADD.X FP4,FP5 ; fp5 = A + ( B * P(B*B)/Q(B*B) + x2fx2) FSUB.X FP2,FP5 ; fp5 = T-(A+( B*P(B*B)/Q(B*B) + x2fx2)) FMOVE.X FP5,FP0 ; get it ready to fly out ; FMUL.B #-1,FP0 ; restore sign due to algorithm (strange!) DELETED <4/23/92, JPO> FNEG.X FP0,FP0 ; restore sign due to algorithm <4/23/92, JPO> FMOVEM (SP)+,FP4/FP5 ; restore what you destroyed BRA.W GetOut ; get ready to fly out ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; P R O B L E M C A S E S ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Problem cases: NaN, -°, +° and zero. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; StinkyBoys BMI.B ArctgZero ; T = 0 ? ( N bit on the 030 is set ) BTST #29,D0 ; is T a NaN? ( bit position 29 ) ; BEQ.B ArctgInf ; all right, T = +° detected! DELETED <4/23/92, JPO> BNE.B ArctgZero ; yes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; All right boys & girls, at this time it is apparent that the good old arg ;; ;; is an infinity or a NaN. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;ArctgZero MOVEM.L (SP)+,A1/A2/A3 ; restore sign, boolean & flag registers DELETED <4/23/92, JPO> ; MOVEQ #0,D0 ; signal no exceptions DELETED <4/23/92, JPO> ; JMP (A4) ; return the input NaN or zero DELETED <4/23/92, JPO> ;ArctgInf: ; label DELETED <4/23/92, JPO> FMOVE.X FPKPI2,FP0 ; return an unsigned ¹/2 ; MOVE.W A1,D1 ; activate the zero flag DELETED <4/23/92, JPO> ; BEQ.B @75 ; is T negative? ( the Z bar bit set ) DELETED <4/23/92, JPO> ; FMUL.B #-1,FP0 ; T was negative then return -¹/2 DELETED <4/23/92, JPO> ;@75 MOVEM.L (SP)+,A1/A2/A3 ; restore sign, boolean & flag registers DELETED <4/23/92, JPO> TST.W (A0) ; return -¹/2 for -INF input <4/23/92, JPO> BPL.B ArctgZero ; return +¹/2 <4/23/92, JPO> FNEG.X FP0,FP0 ; return -¹/2 <4/23/92, JPO> ArctgZero: ; label ADDED <4/23/92, JPO> MOVEQ #0,D0 ; signal no exceptions JMP (A4) ; return either ¹/2 or -¹/2