From 6c134ee06172ac16b5c1f65cb55606241513d9ba Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Sun, 31 Dec 2023 14:02:56 -0500 Subject: [PATCH] bubble: optimizing --- graphics/hgr/bubble/Makefile | 24 +- graphics/hgr/bubble/bubble.s | 187 +++++++++---- graphics/hgr/bubble/bubble_orig.s | 360 +++++++++++++++++++++++++ graphics/hgr/bubble/hgr_clear_screen.s | 91 +++++++ graphics/hgr/bubble/hgr_table.s | 97 +++++++ 5 files changed, 699 insertions(+), 60 deletions(-) create mode 100644 graphics/hgr/bubble/bubble_orig.s create mode 100644 graphics/hgr/bubble/hgr_clear_screen.s create mode 100644 graphics/hgr/bubble/hgr_table.s diff --git a/graphics/hgr/bubble/Makefile b/graphics/hgr/bubble/Makefile index 05188743..588121c6 100644 --- a/graphics/hgr/bubble/Makefile +++ b/graphics/hgr/bubble/Makefile @@ -7,11 +7,13 @@ EMPTY_DISK = ../../../empty_disk all: bubble.dsk -bubble.dsk: HELLO BUBBLE.BAS BUBBLE +bubble.dsk: HELLO BUBBLE.BAS BUBBLE BUBBLE_ORIG DIAMOND cp $(EMPTY_DISK)/empty.dsk bubble.dsk $(DOS33) -y bubble.dsk SAVE A HELLO $(DOS33) -y bubble.dsk SAVE A BUBBLE.BAS $(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE + $(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE_ORIG + $(DOS33) -y bubble.dsk BSAVE -a 0x0C00 DIAMOND ### @@ -28,6 +30,24 @@ bubble.o: bubble.s ### +BUBBLE_ORIG: bubble_orig.o + ld65 -o BUBBLE_ORIG bubble_orig.o -C $(LINKER_SCRIPTS)/apple2_c00.inc + +bubble_orig.o: bubble_orig.s + ca65 -o bubble_orig.o bubble_orig.s -l bubble_orig.lst + + +### + +DIAMOND: diamond.o + ld65 -o DIAMOND diamond.o -C $(LINKER_SCRIPTS)/apple2_c00.inc + +diamond.o: diamond.s + ca65 -o diamond.o diamond.s -l diamond.lst + + +### + BUBBLE.BAS: bubble.bas $(TOKENIZE) < bubble.bas > BUBBLE.BAS @@ -35,4 +55,4 @@ BUBBLE.BAS: bubble.bas ### clean: - rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE + rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE DIAMOND BUBBLE_ORIG diff --git a/graphics/hgr/bubble/bubble.s b/graphics/hgr/bubble/bubble.s index ba2eab2c..338130c3 100644 --- a/graphics/hgr/bubble/bubble.s +++ b/graphics/hgr/bubble/bubble.s @@ -1,12 +1,25 @@ ; bubble universe -- Apple II Hires +; original = 612 bytes +; clear screen: +; bkgnd0 = $44198 = 278936 cycles = max ~4fps +; new: $A616 = 42518 = max ~22fps +; hplot +; hplot0 = ($14E-$15C) $14E = 334 * 1024 = 342016 = max ~3fps +; lookup = 46 * 1024 = 47104 = max ~21fps + + +; after fast graphics +; D7E77 = 884343 = 1.1fps +; DD06E = ?? (made J countdown, why longer?) +; DB584 = destructive U when plotting ; soft-switches KEYPRESS = $C000 KEYRESET = $C010 -PAGE1 = $C054 -PAGE2 = $C055 +PAGE1 = $C054 +PAGE2 = $C055 ; ROM routines @@ -21,6 +34,10 @@ WAIT = $FCA8 ; delay 1/2(26+27A+5A^2) us ; zero page +GBASL = $26 +GBASH = $27 + + HPLOTXL = $90 HPLOTXH = $91 HPLOTYL = $92 @@ -57,10 +74,18 @@ NUM = 32 bubble: + ;======================== + ; setup lookup tables + + jsr hgr_make_tables + + ;======================= + ; init graphics + jsr HGR2 - ldx #7 - jsr HCOLOR1 + ;======================= + ; init variables lda #0 sta XL @@ -70,45 +95,64 @@ bubble: sta TL sta TH + ;========================= + ;========================= + ; main loop + ;========================= + ;========================= + next_frame: - lda #0 - jsr BKGND0 + ;=========================== + ; "fast" clear screen + ; inline to save 12 cycles -main_loop: + jsr hgr_clear_screen - ; clear screen: TODO + ldx #0 ; 2 + stx I ; 3 - ldx #0 - stx I outer_loop: - ldx #0 - stx J + ; setup R*I to inner loop + ; save NUM*4 (128) cycles at expense of 11 cycles + + ldx I ; 3 + lda rl,X ; 4 + sta rl_smc+1 ; 4 + + ldx #NUM ; 2 + stx J ; 3 inner_loop: ; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl); - ldx I ; 3 + ; note: rh is always 0 + + ; pre-calc (R*I)+X for later use + clc ; 2 - lda rl,X +rl_smc: + lda #0 ; R*I ; 2 adc XL ; 3 sta RXL ; 3 - lda rh,X + lda #0 ; 2 adc XH ; 3 sta RXH ; 3 ; fixed_add(i,0,vh,vl,&ivh,&ivl); - clc - lda #0 - adc VL - sta IVL - lda I - adc VH - sta IVH + + ; precalc I+V for later use + ; this is 8.8 fixed point so bottom byte of I is 0 + +; clc ; C should be 0 from prev ; + lda VL ; 3 + sta IVL ; 3 + lda I ; 3 + adc VH ; 3 + sta IVH ; 3 ; U=SIN(I+V)+SIN(RR+X) - ; float_to_fixed(sin(ivh,ivl) + sin(rxh,rxl), &uh,&ul); ldy #0 jsr sin @@ -124,7 +168,6 @@ inner_loop: sta UH ; V=COS(I+V)+COS(RR+X) - ; float_to_fixed(cos(ivh,ivl) + cos(rxh,rxl), &vh,&vl); ldy #0 jsr cos @@ -141,37 +184,52 @@ inner_loop: ; X=U+T - ; fixed_add(uh,ul,th,tl,&xh,&xl); - clc - lda UL - adc TL - sta XL - lda UH - adc TH - sta XH + clc ; 2 + lda UL ; 3 + adc TL ; 3 + sta XL ; 3 + lda UH ; 3 + adc TH ; 3 + sta XH ; 3 ; HPLOT 32*U+140,32*V+96 - ; hplot(48*fixed_to_float(uh,ul)+140, - ; 48*fixed_to_float(vh,vl)+96); - ; HPLOT0 plot at (Y,X), (A) + ; U can be destroyed as we don't use it again? - lda UL - sta HPLOTYL + ; 01234567 89ABCDEF + ; 56789ABC DEF00000 + + ; we want 56789ABC, rotate right by 3 is two iterations faster? + +; lda UL +; sta HPLOTYL + + lda UL ; 3 + + lsr UH ; 5 + ror ; 2 + + lsr UH ; 5 + ror ; 2 + + lsr UH ; 5 + ror ; 2 + +.if 0 lda UH - asl HPLOTYL + asl UL rol - asl HPLOTYL + asl UL rol - asl HPLOTYL + asl UL rol - asl HPLOTYL + asl UL rol - asl HPLOTYL + asl UL rol - +.endif clc adc #140 tax @@ -195,17 +253,31 @@ inner_loop: clc adc #96 + ; "fast" hplot, Xpos in X, Ypos in A - ldy #0 ; never bigger than 140+48 = 188 -; ldx #140 -; lda #96 - jsr HPLOT0 + tay ; 2 + lda hposn_low,Y ; 4 + sta GBASL ; 3 + clc ; 2 + lda hposn_high,Y ; 4 + adc HGR_PAGE ; 3 + sta GBASH ; 3 +; 21 - inc J - lda J - cmp #NUM - beq done_j - jmp inner_loop + ldy div7_table,X ; 4 + + lda mod7_table,X ; 4 + tax ; 2 +; 31 + lda (GBASL),Y ; 5 + ora log_lookup,X ; 4 + sta (GBASL),Y ; 6 +; 46 + + dec J ; 5 + bmi done_j ; 2/3 + jmp inner_loop ; 3 +; bpl inner_loop done_j: inc I @@ -329,12 +401,6 @@ cos: jmp already_loaded -rh: -.byte $00,$00,$00,$00,$00,$00,$00,$00 -.byte $00,$00,$00,$00,$00,$00,$00,$00 -.byte $00,$00,$00,$00,$00,$00,$00,$00 -.byte $00,$00,$00,$00,$00,$00,$00,$00 - rl: .byte $00,$06,$0C,$12,$19,$1F,$25,$2B .byte $32,$38,$3E,$45,$4B,$51,$57,$5E @@ -359,3 +425,8 @@ sin_lookup: .byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC .byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD +log_lookup: + .byte $81,$82,$84,$88,$90,$A0,$C0,$80 + +.include "hgr_clear_screen.s" +.include "hgr_table.s" diff --git a/graphics/hgr/bubble/bubble_orig.s b/graphics/hgr/bubble/bubble_orig.s new file mode 100644 index 00000000..5bbf1397 --- /dev/null +++ b/graphics/hgr/bubble/bubble_orig.s @@ -0,0 +1,360 @@ +; bubble universe -- Apple II Hires + +; soft-switches + +KEYPRESS = $C000 +KEYRESET = $C010 +PAGE1 = $C054 +PAGE2 = $C055 + +; ROM routines + +BKGND0 = $F3F4 ; clear current page to A +HGR2 = $F3D8 ; set hires page2 and clear $4000-$5fff +HGR = $F3E2 ; set hires page1 and clear $2000-$3fff +HPLOT0 = $F457 ; plot at (Y,X), (A) +HLINRL = $F530 ; line to (X,A), (Y) +HCOLOR1 = $F6F0 ; set HGR_COLOR to value in X +COLORTBL = $F6F6 +WAIT = $FCA8 ; delay 1/2(26+27A+5A^2) us + +; zero page + +HPLOTXL = $90 +HPLOTXH = $91 +HPLOTYL = $92 +HPLOTYH = $93 +IVL = $94 +IVH = $95 +RXL = $96 +RXH = $97 +OUT1L = $98 +OUT1H = $99 +OUT2L = $9A +OUT2H = $9B +STEMP1L = $9C +STEMP1H = $9D +STEMP2L = $9E +STEMP2H = $9F + +I = $D0 +J = $D1 +XL = $D4 +XH = $D5 +VL = $D6 +VH = $D7 +TL = $DA +TH = $DB +UL = $DC +UH = $DD + +HGR_PAGE = $E6 + +; const + +NUM = 32 + +bubble: + + jsr HGR2 + + ldx #7 + jsr HCOLOR1 + + lda #0 + sta XL + sta XH + sta VL + sta VH + sta TL + sta TH + +next_frame: + + lda #0 + jsr BKGND0 + +main_loop: + + ; clear screen: TODO + + ldx #0 + stx I +outer_loop: + + ldx #0 + stx J + +inner_loop: + + ; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl); + ldx I ; 3 + clc ; 2 + lda rl,X + adc XL ; 3 + sta RXL ; 3 + lda rh,X + adc XH ; 3 + sta RXH ; 3 + + ; fixed_add(i,0,vh,vl,&ivh,&ivl); + clc + lda #0 + adc VL + sta IVL + lda I + adc VH + sta IVH + + ; U=SIN(I+V)+SIN(RR+X) + ; float_to_fixed(sin(ivh,ivl) + sin(rxh,rxl), &uh,&ul); + + ldy #0 + jsr sin + ldy #2 + jsr sin + + clc + lda OUT1L + adc OUT2L + sta UL + lda OUT1H + adc OUT2H + sta UH + + ; V=COS(I+V)+COS(RR+X) + ; float_to_fixed(cos(ivh,ivl) + cos(rxh,rxl), &vh,&vl); + + ldy #0 + jsr cos + ldy #2 + jsr cos + + clc + lda OUT1L + adc OUT2L + sta VL + lda OUT1H + adc OUT2H + sta VH + + + ; X=U+T + ; fixed_add(uh,ul,th,tl,&xh,&xl); + clc + lda UL + adc TL + sta XL + lda UH + adc TH + sta XH + + ; HPLOT 32*U+140,32*V+96 + ; hplot(48*fixed_to_float(uh,ul)+140, + ; 48*fixed_to_float(vh,vl)+96); + + ; HPLOT0 plot at (Y,X), (A) + + lda UL + sta HPLOTYL + + lda UH + + asl HPLOTYL + rol + asl HPLOTYL + rol + asl HPLOTYL + rol + asl HPLOTYL + rol + asl HPLOTYL + rol + + clc + adc #140 + tax + + lda VL + sta HPLOTYL + + lda VH + + asl HPLOTYL + rol + asl HPLOTYL + rol + asl HPLOTYL + rol + asl HPLOTYL + rol + asl HPLOTYL + rol + + clc + adc #96 + + + ldy #0 ; never bigger than 140+48 = 188 +; ldx #140 +; lda #96 + jsr HPLOT0 + + inc J + lda J + cmp #NUM + beq done_j + jmp inner_loop +done_j: + + inc I + lda I + cmp #NUM + beq done_i + jmp outer_loop +done_i: + + ; t=t+(1.0/32.0); + ; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256 + ; $0x08 + + clc + lda TL + adc #$8 + sta TL + lda #0 + adc TH + sta TH + +end: + ; flip pages + + ; if $20 (draw PAGE1) draw PAGE2, SHOW page1 + ; if $40 (draw PAGE2) draw PAGE1, SHOW page2 + + lda HGR_PAGE + eor #$60 + sta HGR_PAGE + + cmp #$40 + bne flip2 +flip1: + bit PAGE1 + jmp next_frame +flip2: + bit PAGE2 + jmp next_frame + + + + + ;======================= +sin: + + ; / 6.28 is roughly the same as *0.16 + ; = .5 .25 .125 .0625 .03125 + ; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28 + + ; i=(i*0x28)>>8; + + lda IVL,Y + sta STEMP1L + lda IVH,Y + sta STEMP1H +already_loaded: + ; i2=i<<3; + + asl STEMP1L + rol STEMP1H + asl STEMP1L + rol STEMP1H + asl STEMP1L + rol STEMP1H + + ; i1=i<<5; + + lda STEMP1L + sta STEMP2L + lda STEMP1H + sta STEMP2H + + asl STEMP2L + rol STEMP2H + asl STEMP2L + rol STEMP2H + + ; i=(i1+i2)>>8; + + clc + lda STEMP1L + adc STEMP2L + sta STEMP1L + + lda STEMP1H + adc STEMP2H + sta STEMP1H + + ldx STEMP1H + + ; sl=fsinh[i]; + + lda sin_lookup,X + asl + sta OUT1L,Y + + bcs sin_negative +sin_positive: + lda #$0 + beq set_sin_sign +sin_negative: + lda #$FF +set_sin_sign: + sta OUT1H,Y + + rts + + ;============================= +cos: + ; 1.57 is roughly 0x0192 in 8.8 + + clc + lda IVL,Y + adc #$92 + sta STEMP1L + lda IVH,Y + adc #1 + sta STEMP1H + + jmp already_loaded + + +rh: +.byte $00,$00,$00,$00,$00,$00,$00,$00 +.byte $00,$00,$00,$00,$00,$00,$00,$00 +.byte $00,$00,$00,$00,$00,$00,$00,$00 +.byte $00,$00,$00,$00,$00,$00,$00,$00 + +rl: +.byte $00,$06,$0C,$12,$19,$1F,$25,$2B +.byte $32,$38,$3E,$45,$4B,$51,$57,$5E +.byte $64,$6A,$71,$77,$7D,$83,$8A,$90 +.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2 + +sin_lookup: +.byte $00,$03,$06,$09,$0C,$0F,$12,$15,$18,$1C,$1F,$22,$25,$28,$2B,$2E +.byte $30,$33,$36,$39,$3C,$3F,$41,$44,$47,$49,$4C,$4E,$51,$53,$55,$58 +.byte $5A,$5C,$5E,$60,$62,$64,$66,$68,$6A,$6C,$6D,$6F,$70,$72,$73,$74 +.byte $76,$77,$78,$79,$7A,$7B,$7C,$7C,$7D,$7E,$7E,$7F,$7F,$7F,$7F,$7F +.byte $7F,$7F,$7F,$7F,$7F,$7F,$7E,$7E,$7D,$7C,$7C,$7B,$7A,$79,$78,$77 +.byte $76,$75,$73,$72,$70,$6F,$6D,$6C,$6A,$68,$66,$64,$63,$61,$5E,$5C +.byte $5A,$58,$56,$53,$51,$4E,$4C,$49,$47,$44,$41,$3F,$3C,$39,$36,$34 +.byte $31,$2E,$2B,$28,$25,$22,$1F,$1C,$19,$16,$12,$0F,$0C,$09,$06,$03 +.byte $00,$FE,$FA,$F7,$F4,$F1,$EE,$EB,$E8,$E5,$E2,$DF,$DC,$D9,$D6,$D3 +.byte $D0,$CD,$CA,$C7,$C4,$C2,$BF,$BC,$BA,$B7,$B4,$B2,$AF,$AD,$AB,$A8 +.byte $A6,$A4,$A2,$A0,$9E,$9C,$9A,$98,$96,$95,$93,$91,$90,$8E,$8D,$8C +.byte $8A,$89,$88,$87,$86,$85,$84,$84,$83,$82,$82,$81,$81,$81,$81,$81 +.byte $81,$81,$81,$81,$81,$81,$82,$82,$83,$84,$84,$85,$86,$87,$88,$89 +.byte $8A,$8B,$8D,$8E,$8F,$91,$93,$94,$96,$98,$99,$9B,$9D,$9F,$A1,$A4 +.byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC +.byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD + diff --git a/graphics/hgr/bubble/hgr_clear_screen.s b/graphics/hgr/bubble/hgr_clear_screen.s new file mode 100644 index 00000000..010a3f4d --- /dev/null +++ b/graphics/hgr/bubble/hgr_clear_screen.s @@ -0,0 +1,91 @@ +hgr_clear_screen: + + lda #0 ; color + + ldy HGR_PAGE + cpy #$40 + beq hgr_page2_clearscreen + +hgr_page1_clearscreen: + + ldy #0 +hgr_page1_cls_loop: + sta $2000,Y + sta $2100,Y + sta $2200,Y + sta $2300,Y + sta $2400,Y + sta $2500,Y + sta $2600,Y + sta $2700,Y + sta $2800,Y + sta $2900,Y + sta $2A00,Y + sta $2B00,Y + sta $2C00,Y + sta $2D00,Y + sta $2E00,Y + sta $2F00,Y + sta $3000,Y + sta $3100,Y + sta $3200,Y + sta $3300,Y + sta $3400,Y + sta $3500,Y + sta $3600,Y + sta $3700,Y + sta $3800,Y + sta $3900,Y + sta $3A00,Y + sta $3B00,Y + sta $3C00,Y + sta $3D00,Y + sta $3E00,Y + sta $3F00,Y + iny + bne hgr_page1_cls_loop + + rts + + +hgr_page2_clearscreen: + + ldy #0 +hgr_page2_cls_loop: + sta $4000,Y + sta $4100,Y + sta $4200,Y + sta $4300,Y + sta $4400,Y + sta $4500,Y + sta $4600,Y + sta $4700,Y + sta $4800,Y + sta $4900,Y + sta $4A00,Y + sta $4B00,Y + sta $4C00,Y + sta $4D00,Y + sta $4E00,Y + sta $4F00,Y + sta $5000,Y + sta $5100,Y + sta $5200,Y + sta $5300,Y + sta $5400,Y + sta $5500,Y + sta $5600,Y + sta $5700,Y + sta $5800,Y + sta $5900,Y + sta $5A00,Y + sta $5B00,Y + sta $5C00,Y + sta $5D00,Y + sta $5E00,Y + sta $5F00,Y + iny + bne hgr_page2_cls_loop + + rts + diff --git a/graphics/hgr/bubble/hgr_table.s b/graphics/hgr/bubble/hgr_table.s new file mode 100644 index 00000000..2a6d5a99 --- /dev/null +++ b/graphics/hgr/bubble/hgr_table.s @@ -0,0 +1,97 @@ +;div7_table = $b800 +;mod7_table = $b900 +;hposn_high = $ba00 +;hposn_low = $bb00 + + +hgr_make_tables: + + ;===================== + ; make /7 %7 tables + ;===================== + +hgr_make_7_tables: + + ldy #0 + lda #0 + ldx #0 +div7_loop: + sta div7_table,Y + + inx + cpx #7 + bne div7_not7 + + clc + adc #1 + ldx #0 +div7_not7: + iny + bne div7_loop + + + ldy #0 + lda #0 +mod7_loop: + sta mod7_table,Y + clc + adc #1 + cmp #7 + bne mod7_not7 + lda #0 +mod7_not7: + iny + bne mod7_loop + + + ; Hposn table + +; hposn_low, hposn_high will each be filled with $C0 bytes +; based on routine by John Brooks +; posted on comp.sys.apple2 on 2018-07-11 +; https://groups.google.com/d/msg/comp.sys.apple2/v2HOfHOmeNQ/zD76fJg_BAAJ +; clobbers A,X +; preserves Y + +; vmw note: version I was using based on applesoft HPOSN was ~64 bytes +; this one is 37 bytes + +build_hposn_tables: + ldx #0 +btmi: + txa + and #$F8 + bpl btpl1 + ora #5 +btpl1: + asl + bpl btpl2 + ora #5 +btpl2: + asl + asl + sta hposn_low, X + txa + and #7 + rol + asl hposn_low, X + rol + ora #$20 + sta hposn_high, X + inx + cpx #$C0 + bne btmi + +; go 16 beyond, which allows our text scrolling routine + + ldx #16 +extra_table_loop: + lda hposn_low,X + sta hposn_low+192,X + lda hposn_high,X + eor #$60 + sta hposn_high+192,X + dex + bpl extra_table_loop + + rts