From a108df45c1293bdc68a509ade8e297348e4aa85a Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Thu, 7 Sep 2023 00:21:33 -0400 Subject: [PATCH] fac: optimize plasma tables --- fac/Makefile | 22 +++- fac/hello.bas | 2 +- fac/plasma_opt.s | 219 ++++++++++++++++++++++++++++++++++ fac/plasma_tables.s | 283 ++++++++++++++++++++++++++++++++++++++++++++ fac/sin3.s | 1 + 5 files changed, 524 insertions(+), 3 deletions(-) create mode 100644 fac/plasma_opt.s create mode 100644 fac/plasma_tables.s diff --git a/fac/Makefile b/fac/Makefile index 94d2c937..fee1a2b0 100644 --- a/fac/Makefile +++ b/fac/Makefile @@ -7,10 +7,12 @@ EMPTYDISK = ../empty_disk/empty.dsk all: fac.dsk -fac.dsk: HELLO SIN3 +fac.dsk: HELLO SIN3 PLASMA_TABLES PLASMA_OPT cp $(EMPTYDISK) fac.dsk $(DOS33) -y fac.dsk SAVE A HELLO $(DOS33) -y fac.dsk BSAVE -a 0xC00 SIN3 + $(DOS33) -y fac.dsk BSAVE -a 0xC00 PLASMA_TABLES + $(DOS33) -y fac.dsk BSAVE -a 0xC00 PLASMA_OPT ### @@ -28,5 +30,21 @@ sin3.o: sin3.s ### +PLASMA_TABLES: plasma_tables.o + ld65 -o PLASMA_TABLES plasma_tables.o -C $(LINKERSCRIPTS)/apple2_c00.inc + +plasma_tables.o: plasma_tables.s + ca65 -o plasma_tables.o plasma_tables.s -l plasma_tables.lst + +### + +PLASMA_OPT: plasma_opt.o + ld65 -o PLASMA_OPT plasma_opt.o -C $(LINKERSCRIPTS)/apple2_c00.inc + +plasma_opt.o: plasma_opt.s + ca65 -o plasma_opt.o plasma_opt.s -l plasma_opt.lst + +### + clean: - rm -f *~ *.o *.lst SIN3 HELLO + rm -f *~ *.o *.lst SIN3 HELLO PLASMA_TABLES PLASMA_OPT diff --git a/fac/hello.bas b/fac/hello.bas index fd273ced..ec611394 100644 --- a/fac/hello.bas +++ b/fac/hello.bas @@ -1,2 +1,2 @@ 5 HOME -10 PRINT CHR$(4)"BRUN SIN3" +10 PRINT CHR$(4)"BRUN PLASMA_OPT" diff --git a/fac/plasma_opt.s b/fac/plasma_opt.s new file mode 100644 index 00000000..526cba04 --- /dev/null +++ b/fac/plasma_opt.s @@ -0,0 +1,219 @@ +; code to use the FAC (floating point accumulator) +; to generate plasmagoria sine tables + +; 232 bytes = initial implementation +; 218 bytes = increment high byte of destination instead of loading +; 208 bytes = modify 1->4 on the fly +; 205 bytes = make page increment common code +; 198 bytes = convert thirty-two to twenty-four on fly +; 188 bytes = convert forty-seven to thirty-eight with one byte + +qint = $EBF2 ; convert FAC to 32-bit int? +fadd = $E7BE ; FAC = (Y:A)+FAC +movmf = $EB2B ; move fac to mem: round FAC and store at Y:X +fmult = $E97F ; FAC = (Y:A) * FAC +float = $EB93 ; signed value in A to FAC +sin = $EFF1 + +ARG = $A5 ; A5-AA +FAC = $9D ; 9D-A2 + +; code uses: 5E/5F "index" in load arg from Y:A +; uses ARG (A5-AA) for argument +; uses FAC (9D-A2) + + +OURX = $FF + +sin1 = $2000 +sin2 = $2100 +sin3 = $2200 +save = $2300 + +HGR = $F3E2 +FULLGR = $C052 + +add_debut: + jsr HGR + bit FULLGR + + ;==================================================== + ; sin1[i]=round(47.0+ + ; 32.0*sin(i*(PI*2.0/256.0))+ + ; 16.0*sin(2.0*i*(PI*2.0/256.0))); + + ; already set up for this one + + jsr make_sin_table + + ;=================================================== + ; sin2[i]=round(47.0+ + ; 32.0*sin(4.0*i*(PI*2.0/256.0))+ + ; 16.0*sin(3.0*i*(PI*2.0/256.0))); + + ; 47 is same, 32 is same, 16 is same + + ; convert one to four + lda #$7d ; only one byte different + sta one_input + + ; load 3 instead of 2 + lda #three_input + sta sin_table_input4_smc+1 + + jsr make_sin_table + + ;====================================================== + ; sin3[i]=round(38.0+ + ; 24.0*sin(3.0*i*(PI*2.0/256.0))+ + ; 16.0*sin(8.0*i*(PI*2.0/256.0))); + + ; convert 47 to 38 + lda #$18 + sta forty_seven+1 + +; lda #thirty_eight +; sta sin_table_add_smc2+1 + + ; convert 32 to 24 + dec thirty_two + lda #$40 + sta thirty_two+1 + + ; load 3 input + lda #three_input + sta sin_table_input2_smc+1 + + ; load 8 input + lda #eight_input + sta sin_table_input4_smc+1 + + jsr make_sin_table + +end: + jmp end + + + ;=============================== + ;=============================== + ;=============================== + ;=============================== + ;=============================== + +make_sin_table: + + lda #0 + sta OURX + +sin_loop: + + lda OURX + jsr float ; FAC = float(OURX) + +sin_table_input1_smc: + lda #one_input + jsr fmult ; FAC=FAC*(constant from RAM) + + jsr sin ; FAC=sin(FAC) + +;sin_table_scale1_smc: + lda #thirty_two + jsr fmult ; FAC=constant*FAC + + ldx #save + jsr movmf ; save FAC to mem + + lda OURX + jsr float ; FAC = float(OURX) (again) + +sin_table_input3_smc: + lda #two_input + jsr fmult ; FAC=FAC*(constant from RAM) + + jsr sin ; FAC=sin(FAC) + + lda #sixteen + jsr fmult ; FAC=constant*FAC + + ; add first sine + lda #save + jsr fadd ; FAC=FAC+(previous result) + + ; add constant +sin_table_add_smc1: + lda #forty_seven + jsr fadd ; FAC=FAC+constant + + jsr qint ; convert to integer + + lda FAC+4 ; get bottom byte + + ldx OURX + +sin_table_dest_smc: + sta sin1,X ; save to memory + + inc OURX ; move to next + bne sin_loop ; loop until done + + inc sin_table_dest_smc+2 ; point to next location + + rts + + +sixteen: + .byte $85,$00,$00,$00,$00 + +;twenty_four: +; .byte $85,$40,$00,$00,$00 + +thirty_two: + .byte $86,$00,$00,$00,$00 + +;thirty_eight: +; .byte $86,$18,$00,$00,$00 + ; 2^5 = 32, 1.0011 0000 = 1/8+1/16 + +forty_seven: + .byte $86,$3C,$00,$00,$00 + ; 32 * 1.0111 10000 = 1/4+1/8+1/16+1/32 + +one_input: + ; 1*2*pi/256 = .0736310778 + .byte $7b,$49,$0F,$da,$a2 + +two_input: + ; 2*2*pi/256 = .0736310778 + .byte $7c,$49,$0F,$da,$a2 + +three_input: + ; 3*2*pi/256 = .0736310778 + .byte $7d,$16,$cb,$e3,$f9 + +;four_input: +; ; 4*2*pi/256 = .0736310778 +; .byte $7d,$49,$0F,$da,$a2 + +eight_input: + ; 8*2*pi/256 = .196349541 + .byte $7E,$49,$0F,$da,$a2 + diff --git a/fac/plasma_tables.s b/fac/plasma_tables.s new file mode 100644 index 00000000..1c597905 --- /dev/null +++ b/fac/plasma_tables.s @@ -0,0 +1,283 @@ +; code to use the FAC (floating point accumulator) + +chkcom = $DEBE ; check for comma +ptrget = $DFE3 +frmnum = $DD67 ; evaluate expression, make sure is number +FACEXP = $9D +movmf = $EB2B ; move fac to mem: round FAC and store at Y:X +movfm = $EAF9 ; move mem to fac: unpack (Y:A) to FAC +conupk = $E9E3 +fadd = $E7BE ; FAC = (Y:A)+FAC +faddt = $E7C1 ; FAC = ARG + FAC +fadd_half = $E7A0 ; add 0.5 to FAC +fsub = $E7A7 ; FAC = (Y:A)-FAC +fsubt = $E7AA ; FAC = ARG - FAC +fzero = $E84E ; FAC = 0 (sets fac.sign and fac.exp) +fcomplement = $E89E ; twos complement of FAC +fmult = $E97F ; FAC = (Y:A) * FAC +fmultt = $E982 ; FAC = ARG*FAC (!!! Z must be properly set) +load_arg= $E9E3 ; unpack (Y:A) into ARG +mul10 = $EA39 ; FAC=FAC*10 +div10 = $EA55 ; FAC=FAC/10 +div = $EA5E ; FAC=ARG/(Y:A) +fdiv = $EA66 ; FAC=(Y:A)/FAC +fdivt = $EA69 ; FAC=ARG/FAC (!!! Z must be properly set) +; various round and store fac +fac2arg = $EB63 ; ARG = FAC +sign = $EB82 ; SGN(FAC) 1/0/-1 +float = $EB93 ; signed value in A to FAC +fcomp = $EBB2 ; compare +qint = $EBF2 ; convert FAC to 32-bit int? +int = $EC23 ; INT(FAC) (clear fractional part) +addafac = $ECD5 ; add A to FAC (signed?) +printfac= $ED2E +sqr = $EE8D ; FAC=sqrt(FAC) [actually does FAC^0.5 +fpwrt = $EE97 ; FAC=ARG^FAC +negop = $EED0 ; FAC=-FAC +exp = $EF09 ; FAC = e^FAC +; polynomial? +rnd = $EFAE ; RAC = RND() random number +cos = $EFEA +sin = $EFF1 +tan = $F03A +atn = $F09E + +; constants +const_one = $E926 ; one +; poly coefficients? +; sqrt(.5) +; sqrt(2) +; 0.5 +; -0.5 +; log(2) +const_10= $EA50 ; 10 +; billion +; 999,999,999 +; 99,999,999.9 +; log(e) to base(2) +; polynomials for log +; one again +; table of 32-bit powers of 10 +/- for some reason +; pi/2 +pi_doub = $F06E ; 2*pi +; 0.25 (quarter) + + +ARG = $A5 +FAC = $9D + +; code uses: 5E/5F "index" in load arg from Y:A +; uses ARG (A5-AA) for argument +; uses FAC (9D-A2) + + +; in memory, 5 bytes "packed" +; exponent, mantissa MSB, mantissa, mantissa, mantissa l.s.b +; top bit of exponent is sign (0 negative) +; so $84/$20/$00/$00/$00 +; $84 = positive $4, subtract 1, so 2^3 = 8 +; mantissa = 1.XX XX XX XX, in this case 1. (Sign)010 0000 = 1.25 +; 1.25*8 = 10 + +; FAC also has sign byte at $A2 + + +; to make constants +; NEW +; A=10 +; 804L, should be 41 00 - 84 20 00 00 00 +; A - 5-bytes for 10 + +OURX = $FF + +sin1 = $2000 +sin2 = $2100 +sin3 = $2200 +save = $2300 + +HGR = $F3E2 +FULLGR = $C052 + +add_debut: + jsr HGR + bit FULLGR + + ; sin1[i]=round(47.0+ + ; 32.0*sin(i*(PI*2.0/256.0))+ + ; 16.0*sin(2.0*i*(PI*2.0/256.0))); + + ; already set up for this one + + jsr make_sin_table + + ; sin2[i]=round(47.0+ + ; 32.0*sin(4.0*i*(PI*2.0/256.0))+ + ; 16.0*sin(3.0*i*(PI*2.0/256.0))); + + lda #sin2 + sta sin_table_dest_smc+2 + + ; 47 is same + ; 32 is same + ; 16 is same + + lda #four_input + sta sin_table_input2_smc+1 + + lda #three_input + sta sin_table_input4_smc+1 + + jsr make_sin_table + + ; sin3[i]=round(38.0+ + ; 24.0*sin(3.0*i*(PI*2.0/256.0))+ + ; 16.0*sin(8.0*i*(PI*2.0/256.0))); + + lda #sin3 + sta sin_table_dest_smc+2 + + lda #thirty_eight + sta sin_table_add_smc2+1 + + lda #twenty_four + sta sin_table_scale2_smc+1 + + lda #three_input + sta sin_table_input2_smc+1 + + lda #eight_input + sta sin_table_input4_smc+1 + + + + jsr make_sin_table + +end: + jmp end + + + ;=============================== + ;=============================== + ;=============================== + ;=============================== + ;=============================== + +make_sin_table: + + lda #0 + sta OURX + +sin_loop: + + lda OURX + jsr float ; FAC = X + +sin_table_input1_smc: + lda #one_input + jsr fmult + jsr sin +sin_table_scale1_smc: + lda #thirty_two + jsr fmult + + ldx #save + jsr movmf ; save FAC to mem + + lda OURX + jsr float ; FAC = X +sin_table_input3_smc: + lda #two_input + jsr fmult + jsr sin + + lda #sixteen + jsr fmult + + ; add first sine + lda #save + jsr fadd + + ; add 38 +sin_table_add_smc1: + lda #forty_seven + jsr fadd + + jsr qint + + lda FAC+4 + + ldx OURX + +sin_table_dest_smc: + sta sin1,X + + inc OURX + bne sin_loop + + rts + + +sixteen: + .byte $85,$00,$00,$00,$00 + +twenty_four: + .byte $85,$40,$00,$00,$00 + +thirty_two: + .byte $86,$00,$00,$00,$00 + +thirty_eight: + .byte $86,$18,$00,$00,$00 + ; 2^5 = 32, 1.0011 0000 = 1/8+1/16 + +forty_seven: + .byte $86,$3C,$00,$00,$00 + ; 32 * 1.0111 10000 = 1/4+1/8+1/16+1/32 + +one_input: + ; 1*2*pi/256 = .0736310778 + .byte $7b,$49,$0F,$da,$a2 + +two_input: + ; 2*2*pi/256 = .0736310778 + .byte $7c,$49,$0F,$da,$a2 + +three_input: + ; 3*2*pi/256 = .0736310778 + .byte $7d,$16,$cb,$e3,$f9 + +four_input: + ; 4*2*pi/256 = .0736310778 + .byte $7d,$49,$0F,$da,$a2 + +eight_input: + ; 8*2*pi/256 = .196349541 + .byte $7E,$49,$0F,$da,$a2 + diff --git a/fac/sin3.s b/fac/sin3.s index 7568fc34..d124b971 100644 --- a/fac/sin3.s +++ b/fac/sin3.s @@ -125,6 +125,7 @@ sin3_loop: ldy #>eight_input jsr fmult jsr sin + lda #sixteen jsr fmult