From 1a6d39d63fb4ebfcec296258fcba43d0f93aa81a Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 2 Jan 2024 16:20:43 -0500 Subject: [PATCH] bubble: optimize more --- graphics/hgr/bubble/Makefile | 17 +- graphics/hgr/bubble/bubble.s | 149 +++++---- graphics/hgr/bubble/bubble_rolled.s | 455 +++++++++++++++++++++++++ graphics/hgr/bubble/hgr_clear_part.s | 480 +++++++++++++++++++++++++++ graphics/hgr/bubble/sin_unrolled.s | 57 ++++ 5 files changed, 1087 insertions(+), 71 deletions(-) create mode 100644 graphics/hgr/bubble/bubble_rolled.s create mode 100644 graphics/hgr/bubble/hgr_clear_part.s create mode 100644 graphics/hgr/bubble/sin_unrolled.s diff --git a/graphics/hgr/bubble/Makefile b/graphics/hgr/bubble/Makefile index 7138c662..9457d0b9 100644 --- a/graphics/hgr/bubble/Makefile +++ b/graphics/hgr/bubble/Makefile @@ -7,12 +7,13 @@ EMPTY_DISK = ../../../empty_disk all: bubble.dsk make_table -bubble.dsk: HELLO BUBBLE.BAS BUBBLE BUBBLE_ORIG DIAMOND +bubble.dsk: HELLO BUBBLE.BAS BUBBLE BUBBLE_ORIG BUBBLE_ROLLED DIAMOND cp $(EMPTY_DISK)/empty.dsk bubble.dsk $(DOS33) -y bubble.dsk SAVE A HELLO $(DOS33) -y bubble.dsk SAVE A BUBBLE.BAS $(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE $(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE_ORIG + $(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE_ROLLED $(DOS33) -y bubble.dsk BSAVE -a 0x0C00 DIAMOND ### @@ -25,7 +26,7 @@ HELLO: hello.bas BUBBLE: bubble.o ld65 -o BUBBLE bubble.o -C $(LINKER_SCRIPTS)/apple2_c00.inc -bubble.o: bubble.s hgr_clear_screen.s +bubble.o: bubble.s hgr_clear_part.s sin_unrolled.s ca65 -o bubble.o bubble.s -l bubble.lst ### @@ -36,6 +37,15 @@ BUBBLE_ORIG: bubble_orig.o bubble_orig.o: bubble_orig.s ca65 -o bubble_orig.o bubble_orig.s -l bubble_orig.lst +### + +BUBBLE_ROLLED: bubble_rolled.o + ld65 -o BUBBLE_ROLLED bubble_rolled.o -C $(LINKER_SCRIPTS)/apple2_c00.inc + +bubble_rolled.o: bubble_rolled.s + ca65 -o bubble_rolled.o bubble_rolled.s -l bubble_rolled.lst + + ### @@ -63,4 +73,5 @@ make_table.o: make_table.c ### clean: - rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE DIAMOND BUBBLE_ORIG + rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE \ + DIAMOND BUBBLE_ORIG BUBBLE_ROLLED diff --git a/graphics/hgr/bubble/bubble.s b/graphics/hgr/bubble/bubble.s index f496ddf1..a8de6365 100644 --- a/graphics/hgr/bubble/bubble.s +++ b/graphics/hgr/bubble/bubble.s @@ -10,8 +10,8 @@ ; after fast graphics -; D7E77 = 884343 = 1.1fps -; DD06E = ?? (made J countdown, why longer?) +; D7E77(??) = 884343 = 1.1fps +; DD06E = (made J countdown, why longer?) ; DB584 = destructive U when plotting ; D57A2 = rotate right instead of left for HPLOT *32 (U) ; D1D53 = same byt for V @@ -19,8 +19,12 @@ ; AB2FC = optimize sine, keep H value in accumulator = 1.4fps ; A9A38 = optimize cosine slightly ; A50BF = use lookup table for sine sign (takes 256 more bytes) +; 9F673 = clear screen, only clear X region we use +; 9DD73 = clear screen, only clear Y region we use +; 906FE = inline/unroll the sines ; TODO: inline/unroll sine/cosine calls + ; soft-switches KEYPRESS = $C000 @@ -89,6 +93,7 @@ bubble: ;======================= ; init graphics + jsr HGR jsr HGR2 ;======================= @@ -112,15 +117,18 @@ next_frame: ;=========================== ; "fast" clear screen - ; inline to save 12 cycles + ; FIXME: inline to save 12 cycles - jsr hgr_clear_screen + jsr hgr_clear_part + + ; FIXME: see value of X after clear ldx #0 ; 2 stx I ; 3 outer_loop: + ;=========================================================== ; setup R*I to inner loop ; save NUM*4 (128) cycles at expense of 11 cycles @@ -128,6 +136,8 @@ outer_loop: lda rl,X ; 4 sta rl_smc+1 ; 4 + ; countdown NUM times + ldx #NUM ; 2 stx J ; 3 @@ -143,10 +153,17 @@ rl_smc: lda #0 ; R*I ; 2 adc XL ; 3 sta RXL ; 3 + + ; 8 cycles to always add 0 for high byte + ; since we're also copying can't save time by branching if no carry + ; 3 if no carry / 2+5=7 if carry for branch/check + lda #0 ; 2 adc XH ; 3 sta RXH ; 3 +no_rl_carry: + ; fixed_add(i,0,vh,vl,&ivh,&ivl); ; precalc I+V for later use @@ -161,17 +178,45 @@ rl_smc: ; U=SIN(I+V)+SIN(RR+X) - ldy #0 - jsr sin - ldy #2 - jsr sin +; ldy #0 + lda IVL ; 3 + sta STEMP1L ; 3 + lda IVH ; 3 + +; jsr sin +.include "sin_unrolled.s" + + lda sin_table_low,X ; 4 + sta OUT1L ; 3 + lda sin_table_high,X ; 4 + sta OUT1H ; 3 + + + + + + + +; ldy #2 + lda RXL ; 3 + sta STEMP1L ; 3 + lda RXH ; 3 + +; jsr sin + +.include "sin_unrolled.s" + +; lda sin_table_low,X ; 4 +; sta OUT1L ; 3 +; lda sin_table_high,X ; 4 +; sta OUT1H ; 3 clc lda OUT1L - adc OUT2L + adc sin_table_low,X sta UL lda OUT1H - adc OUT2H + adc sin_table_high,X sta UH ; V=COS(I+V)+COS(RR+X) @@ -269,23 +314,25 @@ rl_smc: ; bpl inner_loop done_j: - inc I - lda I - cmp #NUM - beq done_i - jmp outer_loop + inc I ; 5 + lda I ; 3 + cmp #NUM ; 2 + beq done_i ; 2/3 + jmp outer_loop ; 3 done_i: ; t=t+(1.0/32.0); ; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256 ; $0x08 - ; TODO: is CLC necessary? (bcs=bge, bcc=blt) - ; carry always set here + ; carry always set here as we got here from a BEQ + ; (bcs=bge, bcc=blt) - clc ; 2 + +; clc ; 2 lda TL ; 3 - adc #$8 ; 2 +; adc #$8 ; 2 + adc #$7 ; really 8, carry always set ; 2 sta TL ; 3 lda #0 ; 2 adc TH ; 3 @@ -360,29 +407,14 @@ already_loaded: ; sl=fsinh[i]; - ; TODO: tradeoff size for speed by having lookup + ; tradeoff size for speed by having lookup ; table for sign bits + ; the sign lookup only saves like 2 cycles - lda sin_table_low,X - sta OUT1L,Y - lda sin_table_high,X - sta OUT1H,Y - -.if 0 - - lda sin_lookup,X ; 4+ - asl ; 2 + lda sin_table_low,X ; 4+ sta OUT1L,Y ; 5 - - bcs sin_negative ; 2/3 -sin_positive: - lda #$0 ; 2 - beq set_sin_sign ; bra ; 3 -sin_negative: - lda #$FF ; 2 -set_sin_sign: + lda sin_table_high,X ; 4+ sta OUT1H,Y ; 5 -.endif rts ; 6 @@ -402,36 +434,7 @@ cos: jmp already_loaded ; 3 -rl: -.byte $00,$06,$0C,$12,$19,$1F,$25,$2B -.byte $32,$38,$3E,$45,$4B,$51,$57,$5E -.byte $64,$6A,$71,$77,$7D,$83,$8A,$90 -.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2 - -.if 0 -sin_lookup: -.byte $00,$03,$06,$09,$0C,$0F,$12,$15,$18,$1C,$1F,$22,$25,$28,$2B,$2E -.byte $30,$33,$36,$39,$3C,$3F,$41,$44,$47,$49,$4C,$4E,$51,$53,$55,$58 -.byte $5A,$5C,$5E,$60,$62,$64,$66,$68,$6A,$6C,$6D,$6F,$70,$72,$73,$74 -.byte $76,$77,$78,$79,$7A,$7B,$7C,$7C,$7D,$7E,$7E,$7F,$7F,$7F,$7F,$7F -.byte $7F,$7F,$7F,$7F,$7F,$7F,$7E,$7E,$7D,$7C,$7C,$7B,$7A,$79,$78,$77 -.byte $76,$75,$73,$72,$70,$6F,$6D,$6C,$6A,$68,$66,$64,$63,$61,$5E,$5C -.byte $5A,$58,$56,$53,$51,$4E,$4C,$49,$47,$44,$41,$3F,$3C,$39,$36,$34 -.byte $31,$2E,$2B,$28,$25,$22,$1F,$1C,$19,$16,$12,$0F,$0C,$09,$06,$03 -.byte $00,$FE,$FA,$F7,$F4,$F1,$EE,$EB,$E8,$E5,$E2,$DF,$DC,$D9,$D6,$D3 -.byte $D0,$CD,$CA,$C7,$C4,$C2,$BF,$BC,$BA,$B7,$B4,$B2,$AF,$AD,$AB,$A8 -.byte $A6,$A4,$A2,$A0,$9E,$9C,$9A,$98,$96,$95,$93,$91,$90,$8E,$8D,$8C -.byte $8A,$89,$88,$87,$86,$85,$84,$84,$83,$82,$82,$81,$81,$81,$81,$81 -.byte $81,$81,$81,$81,$81,$81,$82,$82,$83,$84,$84,$85,$86,$87,$88,$89 -.byte $8A,$8B,$8D,$8E,$8F,$91,$93,$94,$96,$98,$99,$9B,$9D,$9F,$A1,$A4 -.byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC -.byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD -.endif - -log_lookup: - .byte $81,$82,$84,$88,$90,$A0,$C0,$80 - -.include "hgr_clear_screen.s" +.include "hgr_clear_part.s" .include "hgr_table.s" .align $100 @@ -469,3 +472,13 @@ sin_table_high: .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + +rl: +.byte $00,$06,$0C,$12,$19,$1F,$25,$2B +.byte $32,$38,$3E,$45,$4B,$51,$57,$5E +.byte $64,$6A,$71,$77,$7D,$83,$8A,$90 +.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2 + +log_lookup: + .byte $81,$82,$84,$88,$90,$A0,$C0,$80 + diff --git a/graphics/hgr/bubble/bubble_rolled.s b/graphics/hgr/bubble/bubble_rolled.s new file mode 100644 index 00000000..5ad5dc8b --- /dev/null +++ b/graphics/hgr/bubble/bubble_rolled.s @@ -0,0 +1,455 @@ +; bubble universe -- Apple II Hires + +; original = 612 bytes +; clear screen: +; bkgnd0 = $44198 = 278936 cycles = max ~4fps +; new: $A616 = 42518 = max ~22fps +; hplot +; hplot0 = ($14E-$15C) $14E = 334 * 1024 = 342016 = max ~3fps +; lookup = 46 * 1024 = 47104 = max ~21fps + + +; after fast graphics +; D7E77(??) = 884343 = 1.1fps +; DD06E = (made J countdown, why longer?) +; DB584 = destructive U when plotting +; D57A2 = rotate right instead of left for HPLOT *32 (U) +; D1D53 = same byt for V +; C2679 = optimize sine, don't care about bottom byte in addition +; AB2FC = optimize sine, keep H value in accumulator = 1.4fps +; A9A38 = optimize cosine slightly +; A50BF = use lookup table for sine sign (takes 256 more bytes) +; 9F673 = clear screen, only clear X region we use +; 9DD73 = clear screen, only clear Y region we use +; TODO: inline/unroll sine/cosine calls + + +; soft-switches + +KEYPRESS = $C000 +KEYRESET = $C010 +PAGE1 = $C054 +PAGE2 = $C055 + +; ROM routines + +BKGND0 = $F3F4 ; clear current page to A +HGR2 = $F3D8 ; set hires page2 and clear $4000-$5fff +HGR = $F3E2 ; set hires page1 and clear $2000-$3fff +HPLOT0 = $F457 ; plot at (Y,X), (A) +HLINRL = $F530 ; line to (X,A), (Y) +HCOLOR1 = $F6F0 ; set HGR_COLOR to value in X +COLORTBL = $F6F6 +WAIT = $FCA8 ; delay 1/2(26+27A+5A^2) us + +; zero page + +GBASL = $26 +GBASH = $27 + + +HPLOTXL = $90 +HPLOTXH = $91 +HPLOTYL = $92 +HPLOTYH = $93 +IVL = $94 +IVH = $95 +RXL = $96 +RXH = $97 +OUT1L = $98 +OUT1H = $99 +OUT2L = $9A +OUT2H = $9B +STEMP1L = $9C +STEMP1H = $9D +STEMP2L = $9E +STEMP2H = $9F + +I = $D0 +J = $D1 +XL = $D4 +XH = $D5 +VL = $D6 +VH = $D7 +TL = $DA +TH = $DB +UL = $DC +UH = $DD + +HGR_PAGE = $E6 + +; const + +NUM = 32 + +bubble: + + ;======================== + ; setup lookup tables + + jsr hgr_make_tables + + ;======================= + ; init graphics + + jsr HGR + jsr HGR2 + + ;======================= + ; init variables + + lda #0 + sta XL + sta XH + sta VL + sta VH + sta TL + sta TH + + ;========================= + ;========================= + ; main loop + ;========================= + ;========================= + +next_frame: + + ;=========================== + ; "fast" clear screen + ; FIXME: inline to save 12 cycles + + jsr hgr_clear_part + + ; FIXME: see value of X after clear + + ldx #0 ; 2 + stx I ; 3 + +outer_loop: + + ;=========================================================== + ; setup R*I to inner loop + ; save NUM*4 (128) cycles at expense of 11 cycles + + ldx I ; 3 + lda rl,X ; 4 + sta rl_smc+1 ; 4 + + ; countdown NUM times + + ldx #NUM ; 2 + stx J ; 3 + +inner_loop: + + ; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl); + ; note: rh is always 0 + + ; pre-calc (R*I)+X for later use + + clc ; 2 +rl_smc: + lda #0 ; R*I ; 2 + adc XL ; 3 + sta RXL ; 3 + + ; 8 cycles to always add 0 for high byte + ; since we're also copying can't save time by branching if no carry + ; 3 if no carry / 2+5=7 if carry for branch/check + + lda #0 ; 2 + adc XH ; 3 + sta RXH ; 3 + +no_rl_carry: + + ; fixed_add(i,0,vh,vl,&ivh,&ivl); + + ; precalc I+V for later use + ; this is 8.8 fixed point so bottom byte of I is 0 + +; clc ; C should be 0 from prev ; + lda VL ; 3 + sta IVL ; 3 + lda I ; 3 + adc VH ; 3 + sta IVH ; 3 + + ; U=SIN(I+V)+SIN(RR+X) + + ldy #0 + jsr sin + ldy #2 + jsr sin + + clc + lda OUT1L + adc OUT2L + sta UL + lda OUT1H + adc OUT2H + sta UH + + ; V=COS(I+V)+COS(RR+X) + + ldy #0 + jsr cos + ldy #2 + jsr cos + + clc + lda OUT1L + adc OUT2L + sta VL + lda OUT1H + adc OUT2H + sta VH + + + ; X=U+T + clc ; 2 + lda UL ; 3 + adc TL ; 3 + sta XL ; 3 + lda UH ; 3 + adc TH ; 3 + sta XH ; 3 + + ; HPLOT 32*U+140,32*V+96 + + ; U can be destroyed as we don't use it again? + + ; 01234567 89ABCDEF + + ; 56789ABC DEF00000 + + ; we want 56789ABC, rotate right by 3 is two iterations faster? + + lda UL ; 3 + + lsr UH ; 5 + ror ; 2 + + lsr UH ; 5 + ror ; 2 + + lsr UH ; 5 + ror ; 2 + + clc ; 2 + adc #140 ; 2 + tax ; 2 + + ; calculate Ypos + + lda VH + sta HPLOTYL + lda VL + + lsr HPLOTYL + ror + + lsr HPLOTYL + ror + + lsr HPLOTYL + ror + + clc + adc #96 + + ; "fast" hplot, Xpos in X, Ypos in A + + tay ; 2 + lda hposn_low,Y ; 4 + sta GBASL ; 3 + clc ; 2 + lda hposn_high,Y ; 4 + adc HGR_PAGE ; 3 + sta GBASH ; 3 +; 21 + + ldy div7_table,X ; 4 + + lda mod7_table,X ; 4 + tax ; 2 +; 31 + lda (GBASL),Y ; 5 + ora log_lookup,X ; 4 + sta (GBASL),Y ; 6 +; 46 + + dec J ; 5 + bmi done_j ; 2/3 + jmp inner_loop ; 3 +; bpl inner_loop +done_j: + + inc I ; 5 + lda I ; 3 + cmp #NUM ; 2 + beq done_i ; 2/3 + jmp outer_loop ; 3 +done_i: + + ; t=t+(1.0/32.0); + ; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256 + ; $0x08 + + ; carry always set here as we got here from a BEQ + ; (bcs=bge, bcc=blt) + + +; clc ; 2 + lda TL ; 3 +; adc #$8 ; 2 + adc #$7 ; really 8, carry always set ; 2 + sta TL ; 3 + lda #0 ; 2 + adc TH ; 3 + sta TH ; 3 + +end: + ; flip pages + + ; if $20 (draw PAGE1) draw PAGE2, SHOW page1 + ; if $40 (draw PAGE2) draw PAGE1, SHOW page2 + + lda HGR_PAGE + eor #$60 + sta HGR_PAGE + + cmp #$40 + bne flip2 +flip1: + bit PAGE1 + jmp next_frame +flip2: + bit PAGE2 + jmp next_frame + + + + + ;======================= +sin: + + ; / 6.28 is roughly the same as *0.16 + ; = .5 .25 .125 .0625 .03125 + ; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28 + + ; i=(i*0x28)>>8; + + lda IVL,Y ; note, uses absolute as no ZP equiv ; 4 + sta STEMP1L ; 3 + lda IVH,Y ; 4 +already_loaded: + ; A has STEMP1H + + ; i2=i<<3; + + + asl STEMP1L ; 5 + rol ; 2 + asl STEMP1L ; 5 + rol ; 2 + asl STEMP1L ; 5 + rol ; 2 + + ; i1=i<<5; + + ldx STEMP1L ; 3 + stx STEMP2L ; 3 + + sta STEMP1H ; 3 + + asl STEMP2L ; 5 + rol ; 2 + asl STEMP2L ; 5 + rol ; 2 + + ; i=(i1+i2)>>8; + + ; We ignore the low byte as we don't need it + ; possibly inaccurate as we don't clear carry? + + adc STEMP1H ; 2 + tax ; 2 + + ; sl=fsinh[i]; + + ; tradeoff size for speed by having lookup + ; table for sign bits + ; the sign lookup only saves like 2 cycles + + lda sin_table_low,X ; 4+ + sta OUT1L,Y ; 5 + lda sin_table_high,X ; 4+ + sta OUT1H,Y ; 5 + + rts ; 6 + + ;============================= +cos: + ; 1.57 is roughly 0x0192 in 8.8 + + clc ; 2 + lda IVL,Y ; 4 + adc #$92 ; 2 + sta STEMP1L ; 3 + + lda IVH,Y ; 4 + adc #1 ; 2 +; sta STEMP1H ; 3 + + jmp already_loaded ; 3 + + +.include "hgr_clear_part.s" +.include "hgr_table.s" + +.align $100 +sin_table_low: + .byte $00,$06,$0C,$12,$19,$1F,$25,$2B,$31,$38,$3E,$44,$4A,$50,$56,$5C + .byte $61,$67,$6D,$73,$78,$7E,$83,$88,$8E,$93,$98,$9D,$A2,$A7,$AB,$B0 + .byte $B4,$B9,$BD,$C1,$C5,$C9,$CD,$D1,$D4,$D8,$DB,$DE,$E1,$E4,$E7,$E9 + .byte $EC,$EE,$F0,$F3,$F4,$F6,$F8,$F9,$FB,$FC,$FD,$FE,$FE,$FF,$FF,$FF + .byte $FF,$FF,$FF,$FF,$FE,$FE,$FD,$FC,$FB,$F9,$F8,$F6,$F5,$F3,$F1,$EE + .byte $EC,$EA,$E7,$E4,$E1,$DE,$DB,$D8,$D5,$D1,$CD,$C9,$C6,$C2,$BD,$B9 + .byte $B5,$B0,$AC,$A7,$A2,$9D,$98,$93,$8E,$89,$83,$7E,$78,$73,$6D,$68 + .byte $62,$5C,$56,$50,$4A,$44,$3E,$38,$32,$2C,$25,$1F,$19,$13,$0C,$06 + .byte $00,$FB,$F4,$EE,$E8,$E2,$DB,$D5,$CF,$C9,$C3,$BD,$B7,$B1,$AB,$A5 + .byte $9F,$99,$93,$8E,$88,$83,$7D,$78,$73,$6D,$68,$63,$5E,$5A,$55,$50 + .byte $4C,$47,$43,$3F,$3B,$37,$33,$30,$2C,$29,$25,$22,$1F,$1C,$19,$17 + .byte $14,$12,$10,$0E,$0C,$0A,$08,$07,$06,$04,$03,$02,$02,$01,$01,$01 + .byte $01,$01,$01,$01,$02,$02,$03,$04,$05,$07,$08,$0A,$0B,$0D,$0F,$11 + .byte $14,$16,$19,$1C,$1E,$21,$25,$28,$2B,$2F,$32,$36,$3A,$3E,$42,$47 + .byte $4B,$4F,$54,$59,$5E,$62,$67,$6C,$72,$77,$7C,$82,$87,$8D,$92,$98 + .byte $9E,$A4,$AA,$AF,$B5,$BB,$C2,$C8,$CE,$D4,$DA,$E0,$E7,$ED,$F3,$F9 +sin_table_high: + .byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .byte $00,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + .byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF + +rl: +.byte $00,$06,$0C,$12,$19,$1F,$25,$2B +.byte $32,$38,$3E,$45,$4B,$51,$57,$5E +.byte $64,$6A,$71,$77,$7D,$83,$8A,$90 +.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2 + +log_lookup: + .byte $81,$82,$84,$88,$90,$A0,$C0,$80 + diff --git a/graphics/hgr/bubble/hgr_clear_part.s b/graphics/hgr/bubble/hgr_clear_part.s new file mode 100644 index 00000000..2ba63b10 --- /dev/null +++ b/graphics/hgr/bubble/hgr_clear_part.s @@ -0,0 +1,480 @@ + ; we only draw to + ; Xpositions 140+-64 ( 76-204, /7 = 10 - 30) + ; Ypositions 96+-64 = 32 - 160 + +hgr_clear_part: + + lda #0 ; color + + ldy HGR_PAGE + cpy #$40 + bne hgr_page1_clearscreen + jmp hgr_page2_clearscreen + +hgr_page1_clearscreen: + + ldy #10 +hgr_page1_cls_loop: +.if 0 + sta $2000,Y ; 0 + sta $2400,Y ; 1 + sta $2800,Y ; 2 + sta $2C00,Y ; 3 + sta $3000,Y ; 4 + sta $3400,Y ; 5 + sta $3800,Y ; 6 + sta $3C00,Y ; 7 + + sta $2080,Y ; 8 + sta $2480,Y ; 9 + sta $2880,Y ; 10 + sta $2C80,Y ; 11 + sta $3080,Y ; 12 + sta $3480,Y ; 13 + sta $3880,Y ; 14 + sta $3C80,Y ; 15 + + sta $2100,Y ; 16 + sta $2500,Y ; 17 + sta $2900,Y ; 18 + sta $2D00,Y ; 19 + sta $3100,Y ; 20 + sta $3500,Y ; 21 + sta $3900,Y ; 22 + sta $3D00,Y ; 23 + + sta $2180,Y ; 24 + sta $2580,Y ; 25 + sta $2980,Y ; 26 + sta $2D80,Y ; 27 + sta $3180,Y ; 28 + sta $3580,Y ; 29 + sta $3980,Y ; 30 + sta $3D80,Y ; 31 +.endif + sta $2200,Y ; 32 + sta $2600,Y ; 33 + sta $2A00,Y ; 34 + sta $2E00,Y ; 35 + sta $3200,Y ; 36 + sta $3600,Y ; 37 + sta $3A00,Y ; 38 + sta $3E00,Y ; 39 + + sta $2280,Y ; 40 + sta $2680,Y ; 41 + sta $2A80,Y ; 42 + sta $2E80,Y ; 43 + sta $3280,Y ; 44 + sta $3680,Y ; 45 + sta $3A80,Y ; 46 + sta $3E80,Y ; 47 + + sta $2300,Y ; 48 + sta $2700,Y ; 49 + sta $2B00,Y ; 50 + sta $2F00,Y ; 51 + sta $3300,Y ; 52 + sta $3700,Y ; 53 + sta $3B00,Y ; 54 + sta $3F00,Y ; 55 + + sta $2380,Y ; 56 + sta $2780,Y ; 57 + sta $2B80,Y ; 58 + sta $2F80,Y ; 59 + sta $3380,Y ; 60 + sta $3780,Y ; 61 + sta $3B80,Y ; 62 + sta $3F80,Y ; 63 + + ;======= + + sta $2028,Y ; 64 + sta $2428,Y ; 1 + sta $2828,Y ; 2 + sta $2C28,Y ; 3 + sta $3028,Y ; 4 + sta $3428,Y ; 5 + sta $3828,Y ; 6 + sta $3C28,Y ; 7 + + sta $20A8,Y ; 72 + sta $24A8,Y ; 9 + sta $28A8,Y ; 10 + sta $2CA8,Y ; 11 + sta $30A8,Y ; 12 + sta $34A8,Y ; 13 + sta $38A8,Y ; 14 + sta $3CA8,Y ; 15 + + sta $2128,Y ; 80 + sta $2528,Y ; 17 + sta $2928,Y ; 18 + sta $2D28,Y ; 19 + sta $3128,Y ; 20 + sta $3528,Y ; 21 + sta $3928,Y ; 22 + sta $3D28,Y ; 23 + + sta $21A8,Y ; 88 + sta $25A8,Y ; 25 + sta $29A8,Y ; 26 + sta $2DA8,Y ; 27 + sta $31A8,Y ; 28 + sta $35A8,Y ; 29 + sta $39A8,Y ; 30 + sta $3DA8,Y ; 31 + + sta $2228,Y ; 96 + sta $2628,Y ; 33 + sta $2A28,Y ; 34 + sta $2E28,Y ; 35 + sta $3228,Y ; 36 + sta $3628,Y ; 37 + sta $3A28,Y ; 38 + sta $3E28,Y ; 39 + + sta $22A8,Y ; 104 + sta $26A8,Y ; 41 + sta $2AA8,Y ; 42 + sta $2EA8,Y ; 43 + sta $32A8,Y ; 44 + sta $36A8,Y ; 45 + sta $3AA8,Y ; 46 + sta $3EA8,Y ; 47 + + sta $2328,Y ; 112 + sta $2728,Y ; 49 + sta $2B28,Y ; 50 + sta $2F28,Y ; 51 + sta $3328,Y ; 52 + sta $3728,Y ; 53 + sta $3B28,Y ; 54 + sta $3F28,Y ; 55 + + sta $23A8,Y ; 120 + sta $27A8,Y ; 57 + sta $2BA8,Y ; 58 + sta $2FA8,Y ; 59 + sta $33A8,Y ; 60 + sta $37A8,Y ; 61 + sta $3BA8,Y ; 62 + sta $3FA8,Y ; 63 + + ;========= + + sta $2050,Y ; 128 + sta $2450,Y ; 1 + sta $2850,Y ; 2 + sta $2C50,Y ; 3 + sta $3050,Y ; 4 + sta $3450,Y ; 5 + sta $3850,Y ; 6 + sta $3C50,Y ; 7 + + sta $20D0,Y ; 136 + sta $24D0,Y ; 9 + sta $28D0,Y ; 10 + sta $2CD0,Y ; 11 + sta $30D0,Y ; 12 + sta $34D0,Y ; 13 + sta $38D0,Y ; 14 + sta $3CD0,Y ; 15 + + sta $2150,Y ; 144 + sta $2550,Y ; 17 + sta $2950,Y ; 18 + sta $2D50,Y ; 19 + sta $3150,Y ; 20 + sta $3550,Y ; 21 + sta $3950,Y ; 22 + sta $3D50,Y ; 23 + + sta $21D0,Y ; 152 + sta $25D0,Y ; 25 + sta $29D0,Y ; 26 + sta $2DD0,Y ; 27 + sta $31D0,Y ; 28 + sta $35D0,Y ; 29 + sta $39D0,Y ; 30 + sta $3DD0,Y ; 31 +.if 0 + sta $2250,Y ; 160 + sta $2650,Y ; 33 + sta $2A50,Y ; 34 + sta $2E50,Y ; 35 + sta $3250,Y ; 36 + sta $3650,Y ; 37 + sta $3A50,Y ; 38 + sta $3E50,Y ; 39 + + sta $22D0,Y ; 168 + sta $26D0,Y ; 41 + sta $2AD0,Y ; 42 + sta $2ED0,Y ; 43 + sta $32D0,Y ; 44 + sta $36D0,Y ; 45 + sta $3AD0,Y ; 46 + sta $3ED0,Y ; 47 + + sta $2350,Y ; 176 + sta $2750,Y ; 49 + sta $2B50,Y ; 50 + sta $2F50,Y ; 51 + sta $3350,Y ; 52 + sta $3750,Y ; 53 + sta $3B50,Y ; 54 + sta $3F50,Y ; 55 + + sta $23D0,Y ; 184 + sta $27D0,Y ; 57 + sta $2BD0,Y ; 58 + sta $2FD0,Y ; 59 + sta $33D0,Y ; 60 + sta $37D0,Y ; 61 + sta $3BD0,Y ; 62 + sta $3FD0,Y ; 63 +.endif + iny + cpy #30 + beq hgr_page1_cls_done + jmp hgr_page1_cls_loop + +hgr_page1_cls_done: + + rts + + +hgr_page2_clearscreen: + + ldy #10 +hgr_page2_cls_loop: +.if 0 + sta $4000,Y ; 0 + sta $4400,Y ; 1 + sta $4800,Y ; 2 + sta $4C00,Y ; 3 + sta $5000,Y ; 4 + sta $5400,Y ; 5 + sta $5800,Y ; 6 + sta $5C00,Y ; 7 + + sta $4080,Y ; 8 + sta $4480,Y ; 9 + sta $4880,Y ; 10 + sta $4C80,Y ; 11 + sta $5080,Y ; 12 + sta $5480,Y ; 13 + sta $5880,Y ; 14 + sta $5C80,Y ; 15 + + sta $4100,Y ; 16 + sta $4500,Y ; 17 + sta $4900,Y ; 18 + sta $4D00,Y ; 19 + sta $5100,Y ; 20 + sta $5500,Y ; 21 + sta $5900,Y ; 22 + sta $5D00,Y ; 23 + + sta $4180,Y ; 24 + sta $4580,Y ; 25 + sta $4980,Y ; 26 + sta $4D80,Y ; 27 + sta $5180,Y ; 28 + sta $5580,Y ; 29 + sta $5980,Y ; 30 + sta $5D80,Y ; 31 +.endif + sta $4200,Y ; 32 + sta $4600,Y ; 33 + sta $4A00,Y ; 34 + sta $4E00,Y ; 35 + sta $5200,Y ; 36 + sta $5600,Y ; 37 + sta $5A00,Y ; 38 + sta $5E00,Y ; 39 + + sta $4280,Y ; 40 + sta $4680,Y ; 41 + sta $4A80,Y ; 42 + sta $4E80,Y ; 43 + sta $5280,Y ; 44 + sta $5680,Y ; 45 + sta $5A80,Y ; 46 + sta $5E80,Y ; 47 + + sta $4300,Y ; 48 + sta $4700,Y ; 49 + sta $4B00,Y ; 50 + sta $4F00,Y ; 51 + sta $5300,Y ; 52 + sta $5700,Y ; 53 + sta $5B00,Y ; 54 + sta $5F00,Y ; 55 + + sta $4380,Y ; 56 + sta $4780,Y ; 57 + sta $4B80,Y ; 58 + sta $4F80,Y ; 59 + sta $5380,Y ; 60 + sta $5780,Y ; 61 + sta $5B80,Y ; 62 + sta $5F80,Y ; 63 + + ;======= + + sta $4028,Y ; 64 + sta $4428,Y ; 1 + sta $4828,Y ; 2 + sta $4C28,Y ; 3 + sta $5028,Y ; 4 + sta $5428,Y ; 5 + sta $5828,Y ; 6 + sta $5C28,Y ; 7 + + sta $40A8,Y ; 72 + sta $44A8,Y ; 9 + sta $48A8,Y ; 10 + sta $4CA8,Y ; 11 + sta $50A8,Y ; 12 + sta $54A8,Y ; 13 + sta $58A8,Y ; 14 + sta $5CA8,Y ; 15 + + sta $4128,Y ; 80 + sta $4528,Y ; 17 + sta $4928,Y ; 18 + sta $4D28,Y ; 19 + sta $5128,Y ; 20 + sta $5528,Y ; 21 + sta $5928,Y ; 22 + sta $5D28,Y ; 23 + + sta $41A8,Y ; 88 + sta $45A8,Y ; 25 + sta $49A8,Y ; 26 + sta $4DA8,Y ; 27 + sta $51A8,Y ; 28 + sta $55A8,Y ; 29 + sta $59A8,Y ; 30 + sta $5DA8,Y ; 31 + + sta $4228,Y ; 96 + sta $4628,Y ; 33 + sta $4A28,Y ; 34 + sta $4E28,Y ; 35 + sta $5228,Y ; 36 + sta $5628,Y ; 37 + sta $5A28,Y ; 38 + sta $5E28,Y ; 39 + + sta $42A8,Y ; 104 + sta $46A8,Y ; 41 + sta $4AA8,Y ; 42 + sta $4EA8,Y ; 43 + sta $52A8,Y ; 44 + sta $56A8,Y ; 45 + sta $5AA8,Y ; 46 + sta $5EA8,Y ; 47 + + sta $4328,Y ; 112 + sta $4728,Y ; 49 + sta $4B28,Y ; 50 + sta $4F28,Y ; 51 + sta $5328,Y ; 52 + sta $5728,Y ; 53 + sta $5B28,Y ; 54 + sta $5F28,Y ; 55 + + sta $43A8,Y ; 120 + sta $47A8,Y ; 57 + sta $4BA8,Y ; 58 + sta $4FA8,Y ; 59 + sta $53A8,Y ; 60 + sta $57A8,Y ; 61 + sta $5BA8,Y ; 62 + sta $5FA8,Y ; 63 + + ;========= + + sta $4050,Y ; 128 + sta $4450,Y ; 1 + sta $4850,Y ; 2 + sta $4C50,Y ; 3 + sta $5050,Y ; 4 + sta $5450,Y ; 5 + sta $5850,Y ; 6 + sta $5C50,Y ; 7 + + sta $40D0,Y ; 136 + sta $44D0,Y ; 9 + sta $48D0,Y ; 10 + sta $4CD0,Y ; 11 + sta $50D0,Y ; 12 + sta $54D0,Y ; 13 + sta $58D0,Y ; 14 + sta $5CD0,Y ; 15 + + sta $4150,Y ; 144 + sta $4550,Y ; 17 + sta $4950,Y ; 18 + sta $4D50,Y ; 19 + sta $5150,Y ; 20 + sta $5550,Y ; 21 + sta $5950,Y ; 22 + sta $5D50,Y ; 23 + + sta $41D0,Y ; 152 + sta $45D0,Y ; 25 + sta $49D0,Y ; 26 + sta $4DD0,Y ; 27 + sta $51D0,Y ; 28 + sta $55D0,Y ; 29 + sta $59D0,Y ; 30 + sta $5DD0,Y ; 31 +.if 0 + sta $4250,Y ; 160 + sta $4650,Y ; 33 + sta $4A50,Y ; 34 + sta $4E50,Y ; 35 + sta $5250,Y ; 36 + sta $5650,Y ; 37 + sta $5A50,Y ; 38 + sta $5E50,Y ; 39 + + sta $42D0,Y ; 168 + sta $46D0,Y ; 41 + sta $4AD0,Y ; 42 + sta $4ED0,Y ; 43 + sta $52D0,Y ; 44 + sta $56D0,Y ; 45 + sta $5AD0,Y ; 46 + sta $5ED0,Y ; 47 + + sta $4350,Y ; 176 + sta $4750,Y ; 49 + sta $4B50,Y ; 50 + sta $4F50,Y ; 51 + sta $5350,Y ; 52 + sta $5750,Y ; 53 + sta $5B50,Y ; 54 + sta $5F50,Y ; 55 + + sta $43D0,Y ; 184 + sta $47D0,Y ; 57 + sta $4BD0,Y ; 58 + sta $4FD0,Y ; 59 + sta $53D0,Y ; 60 + sta $57D0,Y ; 61 + sta $5BD0,Y ; 62 + sta $5FD0,Y ; 63 +.endif + iny + cpy #30 + beq done_hgr_page2_cls + jmp hgr_page2_cls_loop +done_hgr_page2_cls: + rts + diff --git a/graphics/hgr/bubble/sin_unrolled.s b/graphics/hgr/bubble/sin_unrolled.s new file mode 100644 index 00000000..267a3baf --- /dev/null +++ b/graphics/hgr/bubble/sin_unrolled.s @@ -0,0 +1,57 @@ + + ;======================= +;sin: + + ; / 6.28 is roughly the same as *0.16 + ; = .5 .25 .125 .0625 .03125 + ; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28 + + ; i=(i*0x28)>>8; + +; lda IVL,Y ; note, uses absolute as no ZP equiv ; 4 +; sta STEMP1L ; 3 +; lda IVH,Y ; 4 + + ; A has STEMP1H + + ; i2=i<<3; + + + asl STEMP1L ; 5 + rol ; 2 + asl STEMP1L ; 5 + rol ; 2 + asl STEMP1L ; 5 + rol ; 2 + + ; i1=i<<5; + + ldx STEMP1L ; 3 + stx STEMP2L ; 3 + + sta STEMP1H ; 3 + + asl STEMP2L ; 5 + rol ; 2 + asl STEMP2L ; 5 + rol ; 2 + + ; i=(i1+i2)>>8; + + ; We ignore the low byte as we don't need it + ; possibly inaccurate as we don't clear carry? + + adc STEMP1H ; 2 + tax ; 2 + + ; sl=fsinh[i]; + + ; tradeoff size for speed by having lookup + ; table for sign bits + ; the sign lookup only saves like 2 cycles + +; lda sin_table_low,X ; 4+ +; sta OUT1L,Y ; 5 +; lda sin_table_high,X ; 4+ +; sta OUT1H,Y ; 5 +