bubble: optimizing

This commit is contained in:
Vince Weaver 2023-12-31 14:02:56 -05:00
parent 988d2cebe5
commit 6c134ee061
5 changed files with 699 additions and 60 deletions

View File

@ -7,11 +7,13 @@ EMPTY_DISK = ../../../empty_disk
all: bubble.dsk all: bubble.dsk
bubble.dsk: HELLO BUBBLE.BAS BUBBLE bubble.dsk: HELLO BUBBLE.BAS BUBBLE BUBBLE_ORIG DIAMOND
cp $(EMPTY_DISK)/empty.dsk bubble.dsk cp $(EMPTY_DISK)/empty.dsk bubble.dsk
$(DOS33) -y bubble.dsk SAVE A HELLO $(DOS33) -y bubble.dsk SAVE A HELLO
$(DOS33) -y bubble.dsk SAVE A BUBBLE.BAS $(DOS33) -y bubble.dsk SAVE A BUBBLE.BAS
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE $(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE_ORIG
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 DIAMOND
### ###
@ -28,6 +30,24 @@ bubble.o: bubble.s
### ###
BUBBLE_ORIG: bubble_orig.o
ld65 -o BUBBLE_ORIG bubble_orig.o -C $(LINKER_SCRIPTS)/apple2_c00.inc
bubble_orig.o: bubble_orig.s
ca65 -o bubble_orig.o bubble_orig.s -l bubble_orig.lst
###
DIAMOND: diamond.o
ld65 -o DIAMOND diamond.o -C $(LINKER_SCRIPTS)/apple2_c00.inc
diamond.o: diamond.s
ca65 -o diamond.o diamond.s -l diamond.lst
###
BUBBLE.BAS: bubble.bas BUBBLE.BAS: bubble.bas
$(TOKENIZE) < bubble.bas > BUBBLE.BAS $(TOKENIZE) < bubble.bas > BUBBLE.BAS
@ -35,4 +55,4 @@ BUBBLE.BAS: bubble.bas
### ###
clean: clean:
rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE DIAMOND BUBBLE_ORIG

View File

@ -1,12 +1,25 @@
; bubble universe -- Apple II Hires ; bubble universe -- Apple II Hires
; original = 612 bytes
; clear screen:
; bkgnd0 = $44198 = 278936 cycles = max ~4fps
; new: $A616 = 42518 = max ~22fps
; hplot
; hplot0 = ($14E-$15C) $14E = 334 * 1024 = 342016 = max ~3fps
; lookup = 46 * 1024 = 47104 = max ~21fps
; after fast graphics
; D7E77 = 884343 = 1.1fps
; DD06E = ?? (made J countdown, why longer?)
; DB584 = destructive U when plotting
; soft-switches ; soft-switches
KEYPRESS = $C000 KEYPRESS = $C000
KEYRESET = $C010 KEYRESET = $C010
PAGE1 = $C054 PAGE1 = $C054
PAGE2 = $C055 PAGE2 = $C055
; ROM routines ; ROM routines
@ -21,6 +34,10 @@ WAIT = $FCA8 ; delay 1/2(26+27A+5A^2) us
; zero page ; zero page
GBASL = $26
GBASH = $27
HPLOTXL = $90 HPLOTXL = $90
HPLOTXH = $91 HPLOTXH = $91
HPLOTYL = $92 HPLOTYL = $92
@ -57,10 +74,18 @@ NUM = 32
bubble: bubble:
;========================
; setup lookup tables
jsr hgr_make_tables
;=======================
; init graphics
jsr HGR2 jsr HGR2
ldx #7 ;=======================
jsr HCOLOR1 ; init variables
lda #0 lda #0
sta XL sta XL
@ -70,45 +95,64 @@ bubble:
sta TL sta TL
sta TH sta TH
;=========================
;=========================
; main loop
;=========================
;=========================
next_frame: next_frame:
lda #0 ;===========================
jsr BKGND0 ; "fast" clear screen
; inline to save 12 cycles
main_loop: jsr hgr_clear_screen
; clear screen: TODO ldx #0 ; 2
stx I ; 3
ldx #0
stx I
outer_loop: outer_loop:
ldx #0 ; setup R*I to inner loop
stx J ; save NUM*4 (128) cycles at expense of 11 cycles
ldx I ; 3
lda rl,X ; 4
sta rl_smc+1 ; 4
ldx #NUM ; 2
stx J ; 3
inner_loop: inner_loop:
; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl); ; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl);
ldx I ; 3 ; note: rh is always 0
; pre-calc (R*I)+X for later use
clc ; 2 clc ; 2
lda rl,X rl_smc:
lda #0 ; R*I ; 2
adc XL ; 3 adc XL ; 3
sta RXL ; 3 sta RXL ; 3
lda rh,X lda #0 ; 2
adc XH ; 3 adc XH ; 3
sta RXH ; 3 sta RXH ; 3
; fixed_add(i,0,vh,vl,&ivh,&ivl); ; fixed_add(i,0,vh,vl,&ivh,&ivl);
clc
lda #0 ; precalc I+V for later use
adc VL ; this is 8.8 fixed point so bottom byte of I is 0
sta IVL
lda I ; clc ; C should be 0 from prev ;
adc VH lda VL ; 3
sta IVH sta IVL ; 3
lda I ; 3
adc VH ; 3
sta IVH ; 3
; U=SIN(I+V)+SIN(RR+X) ; U=SIN(I+V)+SIN(RR+X)
; float_to_fixed(sin(ivh,ivl) + sin(rxh,rxl), &uh,&ul);
ldy #0 ldy #0
jsr sin jsr sin
@ -124,7 +168,6 @@ inner_loop:
sta UH sta UH
; V=COS(I+V)+COS(RR+X) ; V=COS(I+V)+COS(RR+X)
; float_to_fixed(cos(ivh,ivl) + cos(rxh,rxl), &vh,&vl);
ldy #0 ldy #0
jsr cos jsr cos
@ -141,37 +184,52 @@ inner_loop:
; X=U+T ; X=U+T
; fixed_add(uh,ul,th,tl,&xh,&xl); clc ; 2
clc lda UL ; 3
lda UL adc TL ; 3
adc TL sta XL ; 3
sta XL lda UH ; 3
lda UH adc TH ; 3
adc TH sta XH ; 3
sta XH
; HPLOT 32*U+140,32*V+96 ; HPLOT 32*U+140,32*V+96
; hplot(48*fixed_to_float(uh,ul)+140,
; 48*fixed_to_float(vh,vl)+96);
; HPLOT0 plot at (Y,X), (A) ; U can be destroyed as we don't use it again?
lda UL ; 01234567 89ABCDEF
sta HPLOTYL
; 56789ABC DEF00000
; we want 56789ABC, rotate right by 3 is two iterations faster?
; lda UL
; sta HPLOTYL
lda UL ; 3
lsr UH ; 5
ror ; 2
lsr UH ; 5
ror ; 2
lsr UH ; 5
ror ; 2
.if 0
lda UH lda UH
asl HPLOTYL asl UL
rol rol
asl HPLOTYL asl UL
rol rol
asl HPLOTYL asl UL
rol rol
asl HPLOTYL asl UL
rol rol
asl HPLOTYL asl UL
rol rol
.endif
clc clc
adc #140 adc #140
tax tax
@ -195,17 +253,31 @@ inner_loop:
clc clc
adc #96 adc #96
; "fast" hplot, Xpos in X, Ypos in A
ldy #0 ; never bigger than 140+48 = 188 tay ; 2
; ldx #140 lda hposn_low,Y ; 4
; lda #96 sta GBASL ; 3
jsr HPLOT0 clc ; 2
lda hposn_high,Y ; 4
adc HGR_PAGE ; 3
sta GBASH ; 3
; 21
inc J ldy div7_table,X ; 4
lda J
cmp #NUM lda mod7_table,X ; 4
beq done_j tax ; 2
jmp inner_loop ; 31
lda (GBASL),Y ; 5
ora log_lookup,X ; 4
sta (GBASL),Y ; 6
; 46
dec J ; 5
bmi done_j ; 2/3
jmp inner_loop ; 3
; bpl inner_loop
done_j: done_j:
inc I inc I
@ -329,12 +401,6 @@ cos:
jmp already_loaded jmp already_loaded
rh:
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
rl: rl:
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B .byte $00,$06,$0C,$12,$19,$1F,$25,$2B
.byte $32,$38,$3E,$45,$4B,$51,$57,$5E .byte $32,$38,$3E,$45,$4B,$51,$57,$5E
@ -359,3 +425,8 @@ sin_lookup:
.byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC .byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC
.byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD .byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD
log_lookup:
.byte $81,$82,$84,$88,$90,$A0,$C0,$80
.include "hgr_clear_screen.s"
.include "hgr_table.s"

View File

@ -0,0 +1,360 @@
; bubble universe -- Apple II Hires
; soft-switches
KEYPRESS = $C000
KEYRESET = $C010
PAGE1 = $C054
PAGE2 = $C055
; ROM routines
BKGND0 = $F3F4 ; clear current page to A
HGR2 = $F3D8 ; set hires page2 and clear $4000-$5fff
HGR = $F3E2 ; set hires page1 and clear $2000-$3fff
HPLOT0 = $F457 ; plot at (Y,X), (A)
HLINRL = $F530 ; line to (X,A), (Y)
HCOLOR1 = $F6F0 ; set HGR_COLOR to value in X
COLORTBL = $F6F6
WAIT = $FCA8 ; delay 1/2(26+27A+5A^2) us
; zero page
HPLOTXL = $90
HPLOTXH = $91
HPLOTYL = $92
HPLOTYH = $93
IVL = $94
IVH = $95
RXL = $96
RXH = $97
OUT1L = $98
OUT1H = $99
OUT2L = $9A
OUT2H = $9B
STEMP1L = $9C
STEMP1H = $9D
STEMP2L = $9E
STEMP2H = $9F
I = $D0
J = $D1
XL = $D4
XH = $D5
VL = $D6
VH = $D7
TL = $DA
TH = $DB
UL = $DC
UH = $DD
HGR_PAGE = $E6
; const
NUM = 32
bubble:
jsr HGR2
ldx #7
jsr HCOLOR1
lda #0
sta XL
sta XH
sta VL
sta VH
sta TL
sta TH
next_frame:
lda #0
jsr BKGND0
main_loop:
; clear screen: TODO
ldx #0
stx I
outer_loop:
ldx #0
stx J
inner_loop:
; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl);
ldx I ; 3
clc ; 2
lda rl,X
adc XL ; 3
sta RXL ; 3
lda rh,X
adc XH ; 3
sta RXH ; 3
; fixed_add(i,0,vh,vl,&ivh,&ivl);
clc
lda #0
adc VL
sta IVL
lda I
adc VH
sta IVH
; U=SIN(I+V)+SIN(RR+X)
; float_to_fixed(sin(ivh,ivl) + sin(rxh,rxl), &uh,&ul);
ldy #0
jsr sin
ldy #2
jsr sin
clc
lda OUT1L
adc OUT2L
sta UL
lda OUT1H
adc OUT2H
sta UH
; V=COS(I+V)+COS(RR+X)
; float_to_fixed(cos(ivh,ivl) + cos(rxh,rxl), &vh,&vl);
ldy #0
jsr cos
ldy #2
jsr cos
clc
lda OUT1L
adc OUT2L
sta VL
lda OUT1H
adc OUT2H
sta VH
; X=U+T
; fixed_add(uh,ul,th,tl,&xh,&xl);
clc
lda UL
adc TL
sta XL
lda UH
adc TH
sta XH
; HPLOT 32*U+140,32*V+96
; hplot(48*fixed_to_float(uh,ul)+140,
; 48*fixed_to_float(vh,vl)+96);
; HPLOT0 plot at (Y,X), (A)
lda UL
sta HPLOTYL
lda UH
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
clc
adc #140
tax
lda VL
sta HPLOTYL
lda VH
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
clc
adc #96
ldy #0 ; never bigger than 140+48 = 188
; ldx #140
; lda #96
jsr HPLOT0
inc J
lda J
cmp #NUM
beq done_j
jmp inner_loop
done_j:
inc I
lda I
cmp #NUM
beq done_i
jmp outer_loop
done_i:
; t=t+(1.0/32.0);
; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256
; $0x08
clc
lda TL
adc #$8
sta TL
lda #0
adc TH
sta TH
end:
; flip pages
; if $20 (draw PAGE1) draw PAGE2, SHOW page1
; if $40 (draw PAGE2) draw PAGE1, SHOW page2
lda HGR_PAGE
eor #$60
sta HGR_PAGE
cmp #$40
bne flip2
flip1:
bit PAGE1
jmp next_frame
flip2:
bit PAGE2
jmp next_frame
;=======================
sin:
; / 6.28 is roughly the same as *0.16
; = .5 .25 .125 .0625 .03125
; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28
; i=(i*0x28)>>8;
lda IVL,Y
sta STEMP1L
lda IVH,Y
sta STEMP1H
already_loaded:
; i2=i<<3;
asl STEMP1L
rol STEMP1H
asl STEMP1L
rol STEMP1H
asl STEMP1L
rol STEMP1H
; i1=i<<5;
lda STEMP1L
sta STEMP2L
lda STEMP1H
sta STEMP2H
asl STEMP2L
rol STEMP2H
asl STEMP2L
rol STEMP2H
; i=(i1+i2)>>8;
clc
lda STEMP1L
adc STEMP2L
sta STEMP1L
lda STEMP1H
adc STEMP2H
sta STEMP1H
ldx STEMP1H
; sl=fsinh[i];
lda sin_lookup,X
asl
sta OUT1L,Y
bcs sin_negative
sin_positive:
lda #$0
beq set_sin_sign
sin_negative:
lda #$FF
set_sin_sign:
sta OUT1H,Y
rts
;=============================
cos:
; 1.57 is roughly 0x0192 in 8.8
clc
lda IVL,Y
adc #$92
sta STEMP1L
lda IVH,Y
adc #1
sta STEMP1H
jmp already_loaded
rh:
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
rl:
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B
.byte $32,$38,$3E,$45,$4B,$51,$57,$5E
.byte $64,$6A,$71,$77,$7D,$83,$8A,$90
.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2
sin_lookup:
.byte $00,$03,$06,$09,$0C,$0F,$12,$15,$18,$1C,$1F,$22,$25,$28,$2B,$2E
.byte $30,$33,$36,$39,$3C,$3F,$41,$44,$47,$49,$4C,$4E,$51,$53,$55,$58
.byte $5A,$5C,$5E,$60,$62,$64,$66,$68,$6A,$6C,$6D,$6F,$70,$72,$73,$74
.byte $76,$77,$78,$79,$7A,$7B,$7C,$7C,$7D,$7E,$7E,$7F,$7F,$7F,$7F,$7F
.byte $7F,$7F,$7F,$7F,$7F,$7F,$7E,$7E,$7D,$7C,$7C,$7B,$7A,$79,$78,$77
.byte $76,$75,$73,$72,$70,$6F,$6D,$6C,$6A,$68,$66,$64,$63,$61,$5E,$5C
.byte $5A,$58,$56,$53,$51,$4E,$4C,$49,$47,$44,$41,$3F,$3C,$39,$36,$34
.byte $31,$2E,$2B,$28,$25,$22,$1F,$1C,$19,$16,$12,$0F,$0C,$09,$06,$03
.byte $00,$FE,$FA,$F7,$F4,$F1,$EE,$EB,$E8,$E5,$E2,$DF,$DC,$D9,$D6,$D3
.byte $D0,$CD,$CA,$C7,$C4,$C2,$BF,$BC,$BA,$B7,$B4,$B2,$AF,$AD,$AB,$A8
.byte $A6,$A4,$A2,$A0,$9E,$9C,$9A,$98,$96,$95,$93,$91,$90,$8E,$8D,$8C
.byte $8A,$89,$88,$87,$86,$85,$84,$84,$83,$82,$82,$81,$81,$81,$81,$81
.byte $81,$81,$81,$81,$81,$81,$82,$82,$83,$84,$84,$85,$86,$87,$88,$89
.byte $8A,$8B,$8D,$8E,$8F,$91,$93,$94,$96,$98,$99,$9B,$9D,$9F,$A1,$A4
.byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC
.byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD

View File

@ -0,0 +1,91 @@
hgr_clear_screen:
lda #0 ; color
ldy HGR_PAGE
cpy #$40
beq hgr_page2_clearscreen
hgr_page1_clearscreen:
ldy #0
hgr_page1_cls_loop:
sta $2000,Y
sta $2100,Y
sta $2200,Y
sta $2300,Y
sta $2400,Y
sta $2500,Y
sta $2600,Y
sta $2700,Y
sta $2800,Y
sta $2900,Y
sta $2A00,Y
sta $2B00,Y
sta $2C00,Y
sta $2D00,Y
sta $2E00,Y
sta $2F00,Y
sta $3000,Y
sta $3100,Y
sta $3200,Y
sta $3300,Y
sta $3400,Y
sta $3500,Y
sta $3600,Y
sta $3700,Y
sta $3800,Y
sta $3900,Y
sta $3A00,Y
sta $3B00,Y
sta $3C00,Y
sta $3D00,Y
sta $3E00,Y
sta $3F00,Y
iny
bne hgr_page1_cls_loop
rts
hgr_page2_clearscreen:
ldy #0
hgr_page2_cls_loop:
sta $4000,Y
sta $4100,Y
sta $4200,Y
sta $4300,Y
sta $4400,Y
sta $4500,Y
sta $4600,Y
sta $4700,Y
sta $4800,Y
sta $4900,Y
sta $4A00,Y
sta $4B00,Y
sta $4C00,Y
sta $4D00,Y
sta $4E00,Y
sta $4F00,Y
sta $5000,Y
sta $5100,Y
sta $5200,Y
sta $5300,Y
sta $5400,Y
sta $5500,Y
sta $5600,Y
sta $5700,Y
sta $5800,Y
sta $5900,Y
sta $5A00,Y
sta $5B00,Y
sta $5C00,Y
sta $5D00,Y
sta $5E00,Y
sta $5F00,Y
iny
bne hgr_page2_cls_loop
rts

View File

@ -0,0 +1,97 @@
;div7_table = $b800
;mod7_table = $b900
;hposn_high = $ba00
;hposn_low = $bb00
hgr_make_tables:
;=====================
; make /7 %7 tables
;=====================
hgr_make_7_tables:
ldy #0
lda #0
ldx #0
div7_loop:
sta div7_table,Y
inx
cpx #7
bne div7_not7
clc
adc #1
ldx #0
div7_not7:
iny
bne div7_loop
ldy #0
lda #0
mod7_loop:
sta mod7_table,Y
clc
adc #1
cmp #7
bne mod7_not7
lda #0
mod7_not7:
iny
bne mod7_loop
; Hposn table
; hposn_low, hposn_high will each be filled with $C0 bytes
; based on routine by John Brooks
; posted on comp.sys.apple2 on 2018-07-11
; https://groups.google.com/d/msg/comp.sys.apple2/v2HOfHOmeNQ/zD76fJg_BAAJ
; clobbers A,X
; preserves Y
; vmw note: version I was using based on applesoft HPOSN was ~64 bytes
; this one is 37 bytes
build_hposn_tables:
ldx #0
btmi:
txa
and #$F8
bpl btpl1
ora #5
btpl1:
asl
bpl btpl2
ora #5
btpl2:
asl
asl
sta hposn_low, X
txa
and #7
rol
asl hposn_low, X
rol
ora #$20
sta hposn_high, X
inx
cpx #$C0
bne btmi
; go 16 beyond, which allows our text scrolling routine
ldx #16
extra_table_loop:
lda hposn_low,X
sta hposn_low+192,X
lda hposn_high,X
eor #$60
sta hposn_high+192,X
dex
bpl extra_table_loop
rts