From a108df45c1293bdc68a509ade8e297348e4aa85a Mon Sep 17 00:00:00 2001
From: Vince Weaver <vince@deater.net>
Date: Thu, 7 Sep 2023 00:21:33 -0400
Subject: [PATCH] fac: optimize plasma tables

---
 fac/Makefile        |  22 +++-
 fac/hello.bas       |   2 +-
 fac/plasma_opt.s    | 219 ++++++++++++++++++++++++++++++++++
 fac/plasma_tables.s | 283 ++++++++++++++++++++++++++++++++++++++++++++
 fac/sin3.s          |   1 +
 5 files changed, 524 insertions(+), 3 deletions(-)
 create mode 100644 fac/plasma_opt.s
 create mode 100644 fac/plasma_tables.s

diff --git a/fac/Makefile b/fac/Makefile
index 94d2c937..fee1a2b0 100644
--- a/fac/Makefile
+++ b/fac/Makefile
@@ -7,10 +7,12 @@ EMPTYDISK = ../empty_disk/empty.dsk
 
 all:	fac.dsk
 
-fac.dsk:	HELLO SIN3
+fac.dsk:	HELLO SIN3 PLASMA_TABLES PLASMA_OPT
 	cp $(EMPTYDISK) fac.dsk
 	$(DOS33) -y fac.dsk SAVE A HELLO
 	$(DOS33) -y fac.dsk BSAVE -a 0xC00 SIN3
+	$(DOS33) -y fac.dsk BSAVE -a 0xC00 PLASMA_TABLES
+	$(DOS33) -y fac.dsk BSAVE -a 0xC00 PLASMA_OPT
 
 
 ###
@@ -28,5 +30,21 @@ sin3.o:	sin3.s
 
 ###
 
+PLASMA_TABLES:	plasma_tables.o
+	ld65 -o PLASMA_TABLES plasma_tables.o -C $(LINKERSCRIPTS)/apple2_c00.inc
+
+plasma_tables.o:	plasma_tables.s
+	ca65 -o plasma_tables.o plasma_tables.s -l plasma_tables.lst
+
+###
+
+PLASMA_OPT:	plasma_opt.o
+	ld65 -o PLASMA_OPT plasma_opt.o -C $(LINKERSCRIPTS)/apple2_c00.inc
+
+plasma_opt.o:	plasma_opt.s
+	ca65 -o plasma_opt.o plasma_opt.s -l plasma_opt.lst
+
+###
+
 clean:	
-	rm -f *~ *.o *.lst SIN3 HELLO
+	rm -f *~ *.o *.lst SIN3 HELLO PLASMA_TABLES PLASMA_OPT
diff --git a/fac/hello.bas b/fac/hello.bas
index fd273ced..ec611394 100644
--- a/fac/hello.bas
+++ b/fac/hello.bas
@@ -1,2 +1,2 @@
 5 HOME
-10 PRINT CHR$(4)"BRUN SIN3"
+10 PRINT CHR$(4)"BRUN PLASMA_OPT"
diff --git a/fac/plasma_opt.s b/fac/plasma_opt.s
new file mode 100644
index 00000000..526cba04
--- /dev/null
+++ b/fac/plasma_opt.s
@@ -0,0 +1,219 @@
+; code to use the FAC (floating point accumulator)
+; to generate plasmagoria sine tables
+
+; 232 bytes = initial implementation
+; 218 bytes = increment high byte of destination instead of loading
+; 208 bytes = modify 1->4 on the fly
+; 205 bytes = make page increment common code
+; 198 bytes = convert thirty-two to twenty-four on fly
+; 188 bytes = convert forty-seven to thirty-eight with one byte
+
+qint	=	$EBF2		; convert FAC to 32-bit int?
+fadd	=	$E7BE		; FAC = (Y:A)+FAC
+movmf	=	$EB2B		; move fac to mem: round FAC and store at Y:X
+fmult	=	$E97F		; FAC = (Y:A) * FAC
+float	=	$EB93		; signed value in A to FAC
+sin	=	$EFF1
+
+ARG = $A5	; A5-AA
+FAC = $9D	; 9D-A2
+
+; code uses: 5E/5F "index" in load arg from Y:A
+;	uses ARG (A5-AA) for argument
+;	uses FAC (9D-A2)
+
+
+OURX	=	$FF
+
+sin1	=	$2000
+sin2	=	$2100
+sin3	=	$2200
+save	=	$2300
+
+HGR	=	$F3E2
+FULLGR	=	$C052
+
+add_debut:
+	jsr	HGR
+	bit	FULLGR
+
+	;====================================================
+	;	sin1[i]=round(47.0+
+	;		32.0*sin(i*(PI*2.0/256.0))+
+	;		16.0*sin(2.0*i*(PI*2.0/256.0)));
+
+	; already set up for this one
+
+	jsr	make_sin_table
+
+	;===================================================
+	;	sin2[i]=round(47.0+
+	;		32.0*sin(4.0*i*(PI*2.0/256.0))+
+	;		16.0*sin(3.0*i*(PI*2.0/256.0)));
+
+	; 47 is same, 32 is same, 16 is same
+
+	; convert one to four
+	lda	#$7d		; only one byte different
+	sta	one_input
+
+	; load 3 instead of 2
+	lda	#<three_input
+	sta	sin_table_input3_smc+1
+	lda	#>three_input
+	sta	sin_table_input4_smc+1
+
+	jsr	make_sin_table
+
+	;======================================================
+	;	sin3[i]=round(38.0+
+        ;		24.0*sin(3.0*i*(PI*2.0/256.0))+
+        ;		16.0*sin(8.0*i*(PI*2.0/256.0)));
+
+	; convert 47 to 38
+	lda	#$18
+	sta	forty_seven+1
+
+;	lda	#<thirty_eight
+;	sta	sin_table_add_smc1+1
+;	lda	#>thirty_eight
+;	sta	sin_table_add_smc2+1
+
+	; convert 32 to 24
+	dec	thirty_two
+	lda	#$40
+	sta	thirty_two+1
+
+	; load 3 input
+	lda	#<three_input
+	sta	sin_table_input1_smc+1
+	lda	#>three_input
+	sta	sin_table_input2_smc+1
+
+	; load 8 input
+	lda	#<eight_input
+	sta	sin_table_input3_smc+1
+	lda	#>eight_input
+	sta	sin_table_input4_smc+1
+
+	jsr	make_sin_table
+
+end:
+	jmp	end
+
+
+	;===============================
+	;===============================
+	;===============================
+	;===============================
+	;===============================
+
+make_sin_table:
+
+	lda	#0
+	sta	OURX
+
+sin_loop:
+
+	lda	OURX
+	jsr	float			; FAC = float(OURX)
+
+sin_table_input1_smc:
+	lda	#<one_input
+sin_table_input2_smc:
+	ldy	#>one_input
+	jsr	fmult			; FAC=FAC*(constant from RAM)
+
+	jsr	sin			; FAC=sin(FAC)
+
+;sin_table_scale1_smc:
+	lda	#<thirty_two
+;sin_table_scale2_smc:
+	ldy	#>thirty_two
+	jsr	fmult			; FAC=constant*FAC
+
+	ldx	#<save
+	ldy	#>save
+	jsr	movmf			; save FAC to mem
+
+	lda	OURX
+	jsr	float			; FAC = float(OURX)	(again)
+
+sin_table_input3_smc:
+	lda	#<two_input
+sin_table_input4_smc:
+	ldy	#>two_input
+	jsr	fmult			; FAC=FAC*(constant from RAM)
+
+	jsr	sin			; FAC=sin(FAC)
+
+	lda	#<sixteen
+	ldy	#>sixteen
+	jsr	fmult			; FAC=constant*FAC
+
+	; add first sine
+	lda	#<save
+	ldy	#>save
+	jsr	fadd			; FAC=FAC+(previous result)
+
+	; add constant
+sin_table_add_smc1:
+	lda	#<forty_seven
+sin_table_add_smc2:
+	ldy	#>forty_seven
+	jsr	fadd			; FAC=FAC+constant
+
+	jsr	qint			; convert to integer
+
+	lda	FAC+4			; get bottom byte
+
+	ldx	OURX
+
+sin_table_dest_smc:
+	sta	sin1,X			; save to memory
+
+	inc	OURX			; move to next
+	bne	sin_loop		; loop until done
+
+	inc	sin_table_dest_smc+2	; point to next location
+
+	rts
+
+
+sixteen:
+	.byte	$85,$00,$00,$00,$00
+
+;twenty_four:
+;	.byte	$85,$40,$00,$00,$00
+
+thirty_two:
+	.byte	$86,$00,$00,$00,$00
+
+;thirty_eight:
+;	.byte	$86,$18,$00,$00,$00
+	; 2^5 = 32, 1.0011 0000 = 1/8+1/16
+
+forty_seven:
+	.byte	$86,$3C,$00,$00,$00
+	; 32 * 1.0111 10000 = 1/4+1/8+1/16+1/32
+
+one_input:
+	; 1*2*pi/256 = .0736310778
+	.byte $7b,$49,$0F,$da,$a2
+
+two_input:
+	; 2*2*pi/256 = .0736310778
+	.byte $7c,$49,$0F,$da,$a2
+
+three_input:
+	; 3*2*pi/256 = .0736310778
+	.byte $7d,$16,$cb,$e3,$f9
+
+;four_input:
+;	; 4*2*pi/256 = .0736310778
+;	.byte $7d,$49,$0F,$da,$a2
+
+eight_input:
+	; 8*2*pi/256 = .196349541
+	.byte $7E,$49,$0F,$da,$a2
+
diff --git a/fac/plasma_tables.s b/fac/plasma_tables.s
new file mode 100644
index 00000000..1c597905
--- /dev/null
+++ b/fac/plasma_tables.s
@@ -0,0 +1,283 @@
+; code to use the FAC (floating point accumulator)
+
+chkcom	=	$DEBE		; check for comma
+ptrget	=	$DFE3
+frmnum	=	$DD67		; evaluate expression, make sure is number
+FACEXP	=	$9D
+movmf	=	$EB2B		; move fac to mem: round FAC and store at Y:X
+movfm	=	$EAF9		; move mem to fac: unpack (Y:A) to FAC
+conupk	=	$E9E3
+fadd	=	$E7BE		; FAC = (Y:A)+FAC
+faddt	=	$E7C1		; FAC = ARG + FAC
+fadd_half =	$E7A0		; add 0.5 to FAC
+fsub	=	$E7A7		; FAC = (Y:A)-FAC
+fsubt	=	$E7AA		; FAC = ARG - FAC
+fzero	=	$E84E		; FAC = 0 (sets fac.sign and fac.exp)
+fcomplement =	$E89E		; twos complement of FAC
+fmult	=	$E97F		; FAC = (Y:A) * FAC
+fmultt	=	$E982		; FAC = ARG*FAC (!!! Z must be properly set)
+load_arg=	$E9E3		; unpack (Y:A) into ARG
+mul10	=	$EA39		; FAC=FAC*10
+div10	=	$EA55		; FAC=FAC/10
+div	=	$EA5E		; FAC=ARG/(Y:A)
+fdiv	=	$EA66		; FAC=(Y:A)/FAC
+fdivt	=	$EA69		; FAC=ARG/FAC (!!! Z must be properly set)
+; various round and store fac
+fac2arg	=	$EB63		; ARG = FAC
+sign	=	$EB82		; SGN(FAC) 1/0/-1
+float	=	$EB93		; signed value in A to FAC
+fcomp	=	$EBB2		; compare
+qint	=	$EBF2		; convert FAC to 32-bit int?
+int	=	$EC23		; INT(FAC) (clear fractional part)
+addafac	=	$ECD5		; add A to FAC (signed?)
+printfac=	$ED2E
+sqr	=	$EE8D		; FAC=sqrt(FAC) [actually does FAC^0.5
+fpwrt	=	$EE97		; FAC=ARG^FAC
+negop	=	$EED0		; FAC=-FAC
+exp	=	$EF09		; FAC = e^FAC
+; polynomial?
+rnd	=	$EFAE		; RAC = RND() random number
+cos	=	$EFEA
+sin	=	$EFF1
+tan	=	$F03A
+atn	=	$F09E
+
+; constants
+const_one	=	$E926	; one
+; poly coefficients?
+; sqrt(.5)
+; sqrt(2)
+; 0.5
+; -0.5
+; log(2)
+const_10=	$EA50	; 10
+; billion
+; 999,999,999
+; 99,999,999.9
+; log(e) to base(2)
+; polynomials for log
+; one again
+; table of 32-bit powers of 10 +/- for some reason
+; pi/2
+pi_doub	=	$F06E	; 2*pi
+; 0.25 (quarter)
+
+
+ARG = $A5
+FAC = $9D
+
+; code uses: 5E/5F "index" in load arg from Y:A
+;	uses ARG (A5-AA) for argument
+;	uses FAC (9D-A2)
+
+
+; in memory, 5 bytes "packed"
+;	exponent, mantissa MSB, mantissa, mantissa, mantissa l.s.b
+;	top bit of exponent is sign (0 negative)
+;	so $84/$20/$00/$00/$00
+;	$84 = positive $4, subtract 1, so 2^3 = 8
+;	mantissa = 1.XX XX XX XX, in this case 1. (Sign)010 0000 = 1.25
+;	1.25*8 = 10
+
+; FAC also has sign byte at $A2
+
+
+; to make constants
+;	NEW
+;	A=10
+;	804L, should be 41 00 - 84 20 00 00 00
+;                        A    - 5-bytes for 10
+
+OURX	=	$FF
+
+sin1	=	$2000
+sin2	=	$2100
+sin3	=	$2200
+save	=	$2300
+
+HGR	=	$F3E2
+FULLGR	=	$C052
+
+add_debut:
+	jsr	HGR
+	bit	FULLGR
+
+	;	sin1[i]=round(47.0+
+	;		32.0*sin(i*(PI*2.0/256.0))+
+	;		16.0*sin(2.0*i*(PI*2.0/256.0)));
+
+	; already set up for this one
+
+	jsr	make_sin_table
+
+	;	sin2[i]=round(47.0+
+	;		32.0*sin(4.0*i*(PI*2.0/256.0))+
+	;		16.0*sin(3.0*i*(PI*2.0/256.0)));
+
+	lda	#<sin2
+	sta	sin_table_dest_smc+1
+	lda	#>sin2
+	sta	sin_table_dest_smc+2
+
+	; 47 is same
+	; 32 is same
+	; 16 is same
+
+	lda	#<four_input
+	sta	sin_table_input1_smc+1
+	lda	#>four_input
+	sta	sin_table_input2_smc+1
+
+	lda	#<three_input
+	sta	sin_table_input3_smc+1
+	lda	#>three_input
+	sta	sin_table_input4_smc+1
+
+	jsr	make_sin_table
+
+	;	sin3[i]=round(38.0+
+        ;		24.0*sin(3.0*i*(PI*2.0/256.0))+
+        ;		16.0*sin(8.0*i*(PI*2.0/256.0)));
+
+	lda	#<sin3
+	sta	sin_table_dest_smc+1
+	lda	#>sin3
+	sta	sin_table_dest_smc+2
+
+	lda	#<thirty_eight
+	sta	sin_table_add_smc1+1
+	lda	#>thirty_eight
+	sta	sin_table_add_smc2+1
+
+	lda	#<twenty_four
+	sta	sin_table_scale1_smc+1
+	lda	#>twenty_four
+	sta	sin_table_scale2_smc+1
+
+	lda	#<three_input
+	sta	sin_table_input1_smc+1
+	lda	#>three_input
+	sta	sin_table_input2_smc+1
+
+	lda	#<eight_input
+	sta	sin_table_input3_smc+1
+	lda	#>eight_input
+	sta	sin_table_input4_smc+1
+
+
+
+	jsr	make_sin_table
+
+end:
+	jmp	end
+
+
+	;===============================
+	;===============================
+	;===============================
+	;===============================
+	;===============================
+
+make_sin_table:
+
+	lda	#0
+	sta	OURX
+
+sin_loop:
+
+	lda	OURX
+	jsr	float		; FAC = X
+
+sin_table_input1_smc:
+	lda	#<one_input
+sin_table_input2_smc:
+	ldy	#>one_input
+	jsr	fmult
+	jsr	sin
+sin_table_scale1_smc:
+	lda	#<thirty_two
+sin_table_scale2_smc:
+	ldy	#>thirty_two
+	jsr	fmult
+
+	ldx	#<save
+	ldy	#>save
+	jsr	movmf			; save FAC to mem
+
+	lda	OURX
+	jsr	float		; FAC = X
+sin_table_input3_smc:
+	lda	#<two_input
+sin_table_input4_smc:
+	ldy	#>two_input
+	jsr	fmult
+	jsr	sin
+
+	lda	#<sixteen
+	ldy	#>sixteen
+	jsr	fmult
+
+	; add first sine
+	lda	#<save
+	ldy	#>save
+	jsr	fadd
+
+	; add 38
+sin_table_add_smc1:
+	lda	#<forty_seven
+sin_table_add_smc2:
+	ldy	#>forty_seven
+	jsr	fadd
+
+	jsr	qint
+
+	lda	FAC+4
+
+	ldx	OURX
+
+sin_table_dest_smc:
+	sta	sin1,X
+
+	inc	OURX
+	bne	sin_loop
+
+	rts
+
+
+sixteen:
+	.byte	$85,$00,$00,$00,$00
+
+twenty_four:
+	.byte	$85,$40,$00,$00,$00
+
+thirty_two:
+	.byte	$86,$00,$00,$00,$00
+
+thirty_eight:
+	.byte	$86,$18,$00,$00,$00
+	; 2^5 = 32, 1.0011 0000 = 1/8+1/16
+
+forty_seven:
+	.byte	$86,$3C,$00,$00,$00
+	; 32 * 1.0111 10000 = 1/4+1/8+1/16+1/32
+
+one_input:
+	; 1*2*pi/256 = .0736310778
+	.byte $7b,$49,$0F,$da,$a2
+
+two_input:
+	; 2*2*pi/256 = .0736310778
+	.byte $7c,$49,$0F,$da,$a2
+
+three_input:
+	; 3*2*pi/256 = .0736310778
+	.byte $7d,$16,$cb,$e3,$f9
+
+four_input:
+	; 4*2*pi/256 = .0736310778
+	.byte $7d,$49,$0F,$da,$a2
+
+eight_input:
+	; 8*2*pi/256 = .196349541
+	.byte $7E,$49,$0F,$da,$a2
+
diff --git a/fac/sin3.s b/fac/sin3.s
index 7568fc34..d124b971 100644
--- a/fac/sin3.s
+++ b/fac/sin3.s
@@ -125,6 +125,7 @@ sin3_loop:
 	ldy	#>eight_input
 	jsr	fmult
 	jsr	sin
+
 	lda	#<sixteen
 	ldy	#>sixteen
 	jsr	fmult