mac-rom/Toolbox/InSANE/HWElemsArcTg.a
Elliot Nunn 4325cdcc78 Bring in CubeE sources
Resource forks are included only for .rsrc files. These are DeRezzed into their data fork. 'ckid' resources, from the Projector VCS, are not included.

The Tools directory, containing mostly junk, is also excluded.
2017-12-26 09:52:23 +08:00

305 lines
14 KiB
Plaintext

;
; File: HWElemsArcTg.a
;
; Contains: HW ATAN() routines
;
; Written by: Apple Numerics Group, DSG
;
; Copyright: © 1985-1993 by Apple Computer, Inc., all rights reserved.
;
; Change History (most recent first):
;
; <SM2> 2/3/93 CSS Update from Horror:
; <H2> 9/29/92 BG Adding Jon Okada's latest fixes.
; <1> 10/24/91 SAM/KSM Rolled in Regatta file.
;
; Terror Change History:
;
; <1> 01/06/91 BG Added to TERROR/BBS for the time.
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; File 881ELEMStg.a, ;;
;; Function arctg(x), ;;
;; Implementation of tg for ½SANE. ;;
;; For machines with MC68881/68882 FPU. ;;
;; ;;
;; Copyright Apple Computer, Inc. 1985-7, 1989, 1990, 1992, 1993. ;;
;; ;;
;; Written by Ali Sazegari, started on August 11 1989, ;;
;; Added sections to handle flags correctly, December 14, 1989. ;;
;; Funny behavior on the input -inf: i can't get these flags to print with ;;
;; my fortran test program. ;;
;; A problem was found using the arctg with pascal and c compilers: ;;
;; the instruction MOVEM.W A1/A2,-(SP) caused a bus error. The reason ;;
;; is that on the restore side ( 3 occurences in this code ), MOVEM.W the ;;
;; result is sign extended to 32 bits ( akh! ) therefore garbeling both ;;
;; registers. MOVEM.W replaced by MOVEM.L, May 21, 1990. ;;
;; Grouped the A1-A3 save and restore together, May 22, 1990 ;;
;; ;;
;; N.B. This program is a hog, it uses the following registers: ;;
;; a0, d0, d1, fp0, fp1, fp2, fp3, fp4 & fp5. ;;
;; ;;
;; based on Elems881, package code for Macintosh by J. Coonen, ;;
;; who wrote it from pascal programs by D. Hough, C. McMaster and K. Hanson. ;;
;; ;;
;; Further Modification History: ;;
;; 23 Apr 92 Performance tuning done (better register usage, faster FPU ;;
;; ops used, front-end check for small/huge operands eliminates ;;
;; spurious underflowing. ... JPO ;;
;; ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
RAtan
; MOVEM.L A1/A2/A3,-(SP) ; a1 = sgn(T), where T is the argument DELETED <4/23/92, JPO>
; a2 is a boolean, cleared if argument
; is less than one, set otherwise
; use a3 for denormal flag use
; MOVEA.L A0,A3 ; keep a copy of the pointer to argument DELETED <4/23/92, JPO>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Classification of the argument with a neat trick. ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
FMOVE.L FPSR,D0 ; what kind of arg? fpsr has the key
MOVEQ #0,D1 ; clr sign/size flag <4/23/92, JPO>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Here is the strategy ( commonly known as the clay scoop ) by C. R. Lewis ;;
;; Register D0 has N, Z, I, NaN burried in it. Mask off unwanted bits to ;;
;; get them. Then, left shift NZINaN to have Z at MSB position. If D0 = 0 ;;
;; then arg is finite. ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; MOVE.L #ExcMask,D1 ; mask it kid DELETED <4/23/92, JPO>
; AND.L D1,D0 ; we now have the meat DELETED<4/23/92, JPO>
ANDI.L #ExcMask,D0 ; isolate FPCC bits <4/23/92, JPO>
LSL.L #ShiftIt,D0 ; N->C, Z->MSB, I->30th & NaN->29th bit
BNE.W StinkyBoys ; zero, infinite or NaN input <4/23/92, JPO>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; If arg is negative then set bit 31 of D1 ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; BCS.B @70 ; T < 0 ? DELETED <4/23/92, JPO>
; MOVEA.W #0,A1 ; no T > 0, then clear the flag DELETED <4/23/92, JPO>
; BRA.B @71 ; DELETED <4/23/92, JPO>
;@70 label DELETED <4/23/92, JPO>
; MOVEA.W #1,A1 ; yes T < 0, then set the flag DELETED <4/23/92, JPO>
;@71 ; label DELETED <4/23/92, JPO>
BCC.B @smchk ; input > 0 and so is flag D1 <4/23/92, JPO>
BSET.L #31,D1 ; input < 0 and so is flag D1 <4/23/92, JPO>
@smchk: ; label ADDED <4/23/92, JPO>
BFEXTU (A0){1:15},D0 ; |arg| < 2^(-32)? <4/23/92, JPO>
CMPI.W #$3FDF,D0 ; <4/23/92, JPO>
BGE.B @bigchk ; no, check for big input <4/23/92, JPO>
MOVE.L #SetInexact,D0 ; yes, return arg and INEXACT <4/23/92, JPO>
BFTST (A0){1:16} ; if denormal, signal UNDERFLOW <4/23/92, JPO>
BNE.B @done ; <4/23/92, JPO>
OR.L #SetUFlow,D0 ; <4/23/92, JPO>
@done: ; label ADDED <4/23/92, JPO>
JMP (A4) ; <4/23/92, JPO>
@bigchk: CMPI.W #$407F,D0 ; is unbiased exponent > 128? <4/23/92, JPO>
BLE.B @atando ; no, evaluate atan <4/23/92, JPO>
FMOVE.X FPKPI2,FP0 ; yes, return ¹/2 with arg sign <4/23/92, JPO>
TST.L D1 ; sign of D1 is sign of arg <4/23/92, JPO>
BPL.B @bigdone ; <4/23/92, JPO>
FNEG.X FP0,FP0 ; arg < 0.0 <4/23/92, JPO>
@bigdone: ; label added <4/23/92, JPO>
MOVE.L #SetInexact,D0 ; signal INEXACT <4/23/92, JPO>
JMP (A4)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; If arg is a non zero finite number, great! let's fall through, otherwise ;;
;; take the problem cases. ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; BNE.W StinkyBoys ; take care of the bad guys (Z bar set) DELETED <4/23/92, JPO>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Go for the main course. Persian style! ;;
;; -- throughout this program, we use 'arctg' & 'atan' interchangeably. ;;
;; ;;
;; About the argument reduction: ATAN(T) is evaluated for 0 <= T <= 1 with ;;
;; either a Short or a Long formula depending on the value of T. ;;
;; ;;
;; Recall the identities: ;;
;; ;;
;; atan(-T) = -atan(T) ;;
;; atan(1/T) = pi/2 - atan(T) ;;
;; ;;
;; If T < 0 then atan(-T) is computed, and the result negated. ;;
;; If |T| > 1 then atan(1/|T|) is computed and the result subtracted from ¹/2;;
;; ;;
;; To compute atan of reduced T use Short or Long formula: ;;
;; ;;
;; T <= ATnCons Å 0.267 then arctg(T) = T - T * P(T*T) / Q(T*T), ;;
;; else arctg(T) = T - (A + (B*P(B*B)/Q(B*B) + x2fx2)) ;;
;; ;;
;; where x2 and x2fx2 are constants, about 0.5 and 0.05 and ;;
;; A = (T - x2)/(1 + (1/(T*x2))), ;;
;; B = (T - x2)/(1 + (T*x2)). ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@atando: ; label ADDED <4/23/92, JPO>
FABS.X FP0,FP0 ; set up for comparison
; FCMP.W #1,FP0 ; DELETED <4/23/92, JPO>
FCMP.S #"$3F800000",FP0 ; T = |arg| <= 1? <4/23/92, JPO>
FBLE.W TisLTone ; is T <= 1 ?
; MOVEA.W #1,A2 ; not, T > 1, set the flag a2 DELETED <4/23/92, JPO>
; FMOVE.W #1,FP3 ; and invert the T so that 1/T < 1 DELETED <4/23/92, JPO>
ADDQ.B #1,D1 ; set inversion flag (D1.B) <4/23/92, JPO>
FMOVE.S #"$3F800000",FP3 ; FP3 <- 1.0 <4/23/92, JPO>
FDIV.X FP0,FP3 ; fp0 = T, fp3 = 1/T
FMOVE.X FP3,FP0 ; fp0 = 1/T, fp3 = 1/T < 1
BRA.B GoDoIt ; now continue with the computation
TisLTone:
; MOVEA.W #0,A2 ; yes, T <= 1, clear the flag a2 DELETED <4/23/92, JPO>
FMOVE.X FP0,FP3 ; duplicate argument: fp0 = fp3 = T <= 1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Select short or long form of the formula based on reduced argument versus ;;
;; ATnCons, about 0.268. ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
GoDoIt FCMP.X FPKATNCONS,FP0 ; start the selection process
FBGT.W LongForm ; do we use the long formula?
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Short formula is selected, form T^2 and evaluate T * ( P(T^2) / Q(T^2) ) ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
FMUL.X FP0,FP0 ; fp0 = T*T, fp3 = T
LEA ATANP,A0 ; A0 = coeff table ptr for PolyEval
BSR PolyEval ; fp1 = PolyEval(P, T*T)
FMOVE.X FP1,FP2 ; save P(arg)
LEA ATANQ,A0 ; A0 = coeff table ptr for PolyEval
BSR PolyEval ; fp1 = PolyEval(Q, T*T)
FDIV.X FP1,FP2 ; form fp2 = P(T*T)/Q(T*T)
FMUL.X FP3,FP2 ; form fp2 = T * P(T*T)/Q(T*T)
FSUB.X FP2,FP3 ; form fp3 = T - T * P(T*T)/Q(T*T)
FMOVE.X FP3,FP0 ; get the result in the right place
GetOut:
; MOVE.W A2,D1 ; activate the boolean flag DELETED <4/23/92, JPO>
TST.B D1 ; test inversion flag <4/23/92, JPO>
BEQ.B @72 ; is T <= 1 ? ( the Z bar bit set )
FMOVE.X FPKPI2,FP1 ; get ¹/2
FSUB.X FP0,FP1 ; arctg(1/T) = ¹/2 - arctg(T)
FMOVE.X FP1,FP0 ; put it in the right place old boy!
@72:
; MOVE.W A1,D1 ; activate the zero flag DELETED <4/23/92, JPO>
; BEQ.B @73 ; is T negative? ( the Z bar bit set ) DELETED <4/23/92, JPO>
; FMUL.B #-1,FP0 ; T was negative then return -arctg(T) DELETED <4/23/92, JPO>
TST.L D1 ; was arg negative? <4/23/92, JPO>
BPL.B @73 ; no <4/23/92, JPO>
FNEG.X FP0,FP0 ; yes, negate result <4/23/92, JPO>
@73:
; BFEXTU (A3) {1:16},D0 ; get the exponent & 1st mantissa bit DELETED <4/23/92, JPO>
; MOVEM.L (SP)+,A1/A2/A3 ; restore sign, boolean & flag registers DELETED <4/23/92, JPO>
; BEQ.B @74 ; if D0 = 0 then current s = denorm DELETED <4/23/92, JPO>
MOVE.L #SetInexact,D0 ; set the inexact flag
; MOVE.B #ClrUflow ,D0 ; and clear the underflow bit DELETED <4/23/92, JPO>
JMP (A4)
;@74 MOVE.L #SetInexact,D0 ; set up the inexact flag DELETED <4/23/92, JPO>
; JMP (A4) ; go to the control routine DELETED <4/23/92, JPO>
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Long formula is selected, proceed with caution, difficult terrain ahead! ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
LongForm FMOVEM FP4/FP5,-(SP) ; ay ya yay! save two more registers!
FMUL.X FPKX2,FP3 ; form fp3 = T * x2
; FMOVE.W #1,FP2 ; get ready for the inverse DELETED <4/23/92, JPO>
FMOVE.S #"$3F800000",FP2 ; FP2 <- 1.0 <4/23/92, JPO>
FDIV.X FP3,FP2 ; fp0 = T, fp2 = 1 / (T*x2), fp3 = T*x2
; FADD.W #1,FP2 ; fp2 = 1 + 1 / (T*x2) DELETED <4/23/92, JPO>
; FADD.W #1,FP3 ; fp3 = 1 + ( T*x2 ) DELETED <4/23/92, JPO>
FADD.S #"$3F800000",FP3 ; FP3 <- 1.0 + (T*x2) <4/23/92, JPO>
FADD.S #"$3F800000",FP2 ; FP2 <- 1.0 + 1.0/(T*x2) <4/23/92, JPO>
FMOVE.X FP0,FP1 ; copy T, fp0 = fp1 = T
FSUB.X FPKX2,FP1 ; form fp1 = T - x2
FMOVE.X FP1,FP4 ; copy for the next move, fp4=fp1= T - x2
FDIV.X FP3,FP1 ; we have fp1 = B = (T-x2)/(1+(1+(T*x2))
FDIV.X FP2,FP4 ; and fp4 = A = (T-x2)/(1 + 1/(T*x2))
FMOVE.X FP1,FP3 ; fp0=T, fp1=B, fp2=junk, fp3=B & fp4=A
FMUL.X FP1,FP1 ; ok boys, now fp1 = B*B, ready
FMOVE.X FP0,FP2 ; hold it, PolyEval needs its arg in fp0
FMOVE.X FP1,FP0 ; fp0=fp1= B*B, fp2 = T, fp3 = B & fp4 = A
LEA ATANP,A0 ; A0 = coeff table ptr for PolyEval
BSR PolyEval ; fp1 = PolyEval(P, B*B)
FMOVE.X FP1,FP5 ; save P(arg)
LEA ATANQ,A0 ; A0 = coeff table ptr for PolyEval
BSR PolyEval ; fp1 = PolyEval(Q, B*B)
FDIV.X FP1,FP5 ; form fp5 = P(B*B)/Q(B*B)
FMUL.X FP3,FP5 ; form fp5 = B * P(B*B)/Q(B*B)
FADD.X FPKX2FX2,FP5 ; fp5 = B * P(B*B)/Q(B*B) + x2fx2
FADD.X FP4,FP5 ; fp5 = A + ( B * P(B*B)/Q(B*B) + x2fx2)
FSUB.X FP2,FP5 ; fp5 = T-(A+( B*P(B*B)/Q(B*B) + x2fx2))
FMOVE.X FP5,FP0 ; get it ready to fly out
; FMUL.B #-1,FP0 ; restore sign due to algorithm (strange!) DELETED <4/23/92, JPO>
FNEG.X FP0,FP0 ; restore sign due to algorithm <4/23/92, JPO>
FMOVEM (SP)+,FP4/FP5 ; restore what you destroyed
BRA.W GetOut ; get ready to fly out
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; P R O B L E M C A S E S ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Problem cases: NaN, -°, +° and zero. ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
StinkyBoys
BMI.B ArctgZero ; T = 0 ? ( N bit on the 030 is set )
BTST #29,D0 ; is T a NaN? ( bit position 29 )
; BEQ.B ArctgInf ; all right, T = +° detected! DELETED <4/23/92, JPO>
BNE.B ArctgZero ; yes
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; All right boys & girls, at this time it is apparent that the good old arg ;;
;; is an infinity or a NaN. ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;ArctgZero MOVEM.L (SP)+,A1/A2/A3 ; restore sign, boolean & flag registers DELETED <4/23/92, JPO>
; MOVEQ #0,D0 ; signal no exceptions DELETED <4/23/92, JPO>
; JMP (A4) ; return the input NaN or zero DELETED <4/23/92, JPO>
;ArctgInf: ; label DELETED <4/23/92, JPO>
FMOVE.X FPKPI2,FP0 ; return an unsigned ¹/2
; MOVE.W A1,D1 ; activate the zero flag DELETED <4/23/92, JPO>
; BEQ.B @75 ; is T negative? ( the Z bar bit set ) DELETED <4/23/92, JPO>
; FMUL.B #-1,FP0 ; T was negative then return -¹/2 DELETED <4/23/92, JPO>
;@75 MOVEM.L (SP)+,A1/A2/A3 ; restore sign, boolean & flag registers DELETED <4/23/92, JPO>
TST.W (A0) ; return -¹/2 for -INF input <4/23/92, JPO>
BPL.B ArctgZero ; return +¹/2 <4/23/92, JPO>
FNEG.X FP0,FP0 ; return -¹/2 <4/23/92, JPO>
ArctgZero: ; label ADDED <4/23/92, JPO>
MOVEQ #0,D0 ; signal no exceptions
JMP (A4) ; return either ¹/2 or -¹/2