bubble: optimizing

This commit is contained in:
Vince Weaver 2023-12-31 14:02:56 -05:00
parent 988d2cebe5
commit 6c134ee061
5 changed files with 699 additions and 60 deletions

View File

@ -7,11 +7,13 @@ EMPTY_DISK = ../../../empty_disk
all: bubble.dsk
bubble.dsk: HELLO BUBBLE.BAS BUBBLE
bubble.dsk: HELLO BUBBLE.BAS BUBBLE BUBBLE_ORIG DIAMOND
cp $(EMPTY_DISK)/empty.dsk bubble.dsk
$(DOS33) -y bubble.dsk SAVE A HELLO
$(DOS33) -y bubble.dsk SAVE A BUBBLE.BAS
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE_ORIG
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 DIAMOND
###
@ -28,6 +30,24 @@ bubble.o: bubble.s
###
BUBBLE_ORIG: bubble_orig.o
ld65 -o BUBBLE_ORIG bubble_orig.o -C $(LINKER_SCRIPTS)/apple2_c00.inc
bubble_orig.o: bubble_orig.s
ca65 -o bubble_orig.o bubble_orig.s -l bubble_orig.lst
###
DIAMOND: diamond.o
ld65 -o DIAMOND diamond.o -C $(LINKER_SCRIPTS)/apple2_c00.inc
diamond.o: diamond.s
ca65 -o diamond.o diamond.s -l diamond.lst
###
BUBBLE.BAS: bubble.bas
$(TOKENIZE) < bubble.bas > BUBBLE.BAS
@ -35,4 +55,4 @@ BUBBLE.BAS: bubble.bas
###
clean:
rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE
rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE DIAMOND BUBBLE_ORIG

View File

@ -1,5 +1,18 @@
; bubble universe -- Apple II Hires
; original = 612 bytes
; clear screen:
; bkgnd0 = $44198 = 278936 cycles = max ~4fps
; new: $A616 = 42518 = max ~22fps
; hplot
; hplot0 = ($14E-$15C) $14E = 334 * 1024 = 342016 = max ~3fps
; lookup = 46 * 1024 = 47104 = max ~21fps
; after fast graphics
; D7E77 = 884343 = 1.1fps
; DD06E = ?? (made J countdown, why longer?)
; DB584 = destructive U when plotting
; soft-switches
@ -21,6 +34,10 @@ WAIT = $FCA8 ; delay 1/2(26+27A+5A^2) us
; zero page
GBASL = $26
GBASH = $27
HPLOTXL = $90
HPLOTXH = $91
HPLOTYL = $92
@ -57,10 +74,18 @@ NUM = 32
bubble:
;========================
; setup lookup tables
jsr hgr_make_tables
;=======================
; init graphics
jsr HGR2
ldx #7
jsr HCOLOR1
;=======================
; init variables
lda #0
sta XL
@ -70,45 +95,64 @@ bubble:
sta TL
sta TH
;=========================
;=========================
; main loop
;=========================
;=========================
next_frame:
lda #0
jsr BKGND0
;===========================
; "fast" clear screen
; inline to save 12 cycles
main_loop:
jsr hgr_clear_screen
; clear screen: TODO
ldx #0 ; 2
stx I ; 3
ldx #0
stx I
outer_loop:
ldx #0
stx J
; setup R*I to inner loop
; save NUM*4 (128) cycles at expense of 11 cycles
ldx I ; 3
lda rl,X ; 4
sta rl_smc+1 ; 4
ldx #NUM ; 2
stx J ; 3
inner_loop:
; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl);
ldx I ; 3
; note: rh is always 0
; pre-calc (R*I)+X for later use
clc ; 2
lda rl,X
rl_smc:
lda #0 ; R*I ; 2
adc XL ; 3
sta RXL ; 3
lda rh,X
lda #0 ; 2
adc XH ; 3
sta RXH ; 3
; fixed_add(i,0,vh,vl,&ivh,&ivl);
clc
lda #0
adc VL
sta IVL
lda I
adc VH
sta IVH
; precalc I+V for later use
; this is 8.8 fixed point so bottom byte of I is 0
; clc ; C should be 0 from prev ;
lda VL ; 3
sta IVL ; 3
lda I ; 3
adc VH ; 3
sta IVH ; 3
; U=SIN(I+V)+SIN(RR+X)
; float_to_fixed(sin(ivh,ivl) + sin(rxh,rxl), &uh,&ul);
ldy #0
jsr sin
@ -124,7 +168,6 @@ inner_loop:
sta UH
; V=COS(I+V)+COS(RR+X)
; float_to_fixed(cos(ivh,ivl) + cos(rxh,rxl), &vh,&vl);
ldy #0
jsr cos
@ -141,37 +184,52 @@ inner_loop:
; X=U+T
; fixed_add(uh,ul,th,tl,&xh,&xl);
clc
lda UL
adc TL
sta XL
lda UH
adc TH
sta XH
clc ; 2
lda UL ; 3
adc TL ; 3
sta XL ; 3
lda UH ; 3
adc TH ; 3
sta XH ; 3
; HPLOT 32*U+140,32*V+96
; hplot(48*fixed_to_float(uh,ul)+140,
; 48*fixed_to_float(vh,vl)+96);
; HPLOT0 plot at (Y,X), (A)
; U can be destroyed as we don't use it again?
lda UL
sta HPLOTYL
; 01234567 89ABCDEF
; 56789ABC DEF00000
; we want 56789ABC, rotate right by 3 is two iterations faster?
; lda UL
; sta HPLOTYL
lda UL ; 3
lsr UH ; 5
ror ; 2
lsr UH ; 5
ror ; 2
lsr UH ; 5
ror ; 2
.if 0
lda UH
asl HPLOTYL
asl UL
rol
asl HPLOTYL
asl UL
rol
asl HPLOTYL
asl UL
rol
asl HPLOTYL
asl UL
rol
asl HPLOTYL
asl UL
rol
.endif
clc
adc #140
tax
@ -195,17 +253,31 @@ inner_loop:
clc
adc #96
; "fast" hplot, Xpos in X, Ypos in A
ldy #0 ; never bigger than 140+48 = 188
; ldx #140
; lda #96
jsr HPLOT0
tay ; 2
lda hposn_low,Y ; 4
sta GBASL ; 3
clc ; 2
lda hposn_high,Y ; 4
adc HGR_PAGE ; 3
sta GBASH ; 3
; 21
inc J
lda J
cmp #NUM
beq done_j
jmp inner_loop
ldy div7_table,X ; 4
lda mod7_table,X ; 4
tax ; 2
; 31
lda (GBASL),Y ; 5
ora log_lookup,X ; 4
sta (GBASL),Y ; 6
; 46
dec J ; 5
bmi done_j ; 2/3
jmp inner_loop ; 3
; bpl inner_loop
done_j:
inc I
@ -329,12 +401,6 @@ cos:
jmp already_loaded
rh:
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
rl:
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B
.byte $32,$38,$3E,$45,$4B,$51,$57,$5E
@ -359,3 +425,8 @@ sin_lookup:
.byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC
.byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD
log_lookup:
.byte $81,$82,$84,$88,$90,$A0,$C0,$80
.include "hgr_clear_screen.s"
.include "hgr_table.s"

View File

@ -0,0 +1,360 @@
; bubble universe -- Apple II Hires
; soft-switches
KEYPRESS = $C000
KEYRESET = $C010
PAGE1 = $C054
PAGE2 = $C055
; ROM routines
BKGND0 = $F3F4 ; clear current page to A
HGR2 = $F3D8 ; set hires page2 and clear $4000-$5fff
HGR = $F3E2 ; set hires page1 and clear $2000-$3fff
HPLOT0 = $F457 ; plot at (Y,X), (A)
HLINRL = $F530 ; line to (X,A), (Y)
HCOLOR1 = $F6F0 ; set HGR_COLOR to value in X
COLORTBL = $F6F6
WAIT = $FCA8 ; delay 1/2(26+27A+5A^2) us
; zero page
HPLOTXL = $90
HPLOTXH = $91
HPLOTYL = $92
HPLOTYH = $93
IVL = $94
IVH = $95
RXL = $96
RXH = $97
OUT1L = $98
OUT1H = $99
OUT2L = $9A
OUT2H = $9B
STEMP1L = $9C
STEMP1H = $9D
STEMP2L = $9E
STEMP2H = $9F
I = $D0
J = $D1
XL = $D4
XH = $D5
VL = $D6
VH = $D7
TL = $DA
TH = $DB
UL = $DC
UH = $DD
HGR_PAGE = $E6
; const
NUM = 32
bubble:
jsr HGR2
ldx #7
jsr HCOLOR1
lda #0
sta XL
sta XH
sta VL
sta VH
sta TL
sta TH
next_frame:
lda #0
jsr BKGND0
main_loop:
; clear screen: TODO
ldx #0
stx I
outer_loop:
ldx #0
stx J
inner_loop:
; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl);
ldx I ; 3
clc ; 2
lda rl,X
adc XL ; 3
sta RXL ; 3
lda rh,X
adc XH ; 3
sta RXH ; 3
; fixed_add(i,0,vh,vl,&ivh,&ivl);
clc
lda #0
adc VL
sta IVL
lda I
adc VH
sta IVH
; U=SIN(I+V)+SIN(RR+X)
; float_to_fixed(sin(ivh,ivl) + sin(rxh,rxl), &uh,&ul);
ldy #0
jsr sin
ldy #2
jsr sin
clc
lda OUT1L
adc OUT2L
sta UL
lda OUT1H
adc OUT2H
sta UH
; V=COS(I+V)+COS(RR+X)
; float_to_fixed(cos(ivh,ivl) + cos(rxh,rxl), &vh,&vl);
ldy #0
jsr cos
ldy #2
jsr cos
clc
lda OUT1L
adc OUT2L
sta VL
lda OUT1H
adc OUT2H
sta VH
; X=U+T
; fixed_add(uh,ul,th,tl,&xh,&xl);
clc
lda UL
adc TL
sta XL
lda UH
adc TH
sta XH
; HPLOT 32*U+140,32*V+96
; hplot(48*fixed_to_float(uh,ul)+140,
; 48*fixed_to_float(vh,vl)+96);
; HPLOT0 plot at (Y,X), (A)
lda UL
sta HPLOTYL
lda UH
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
clc
adc #140
tax
lda VL
sta HPLOTYL
lda VH
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
clc
adc #96
ldy #0 ; never bigger than 140+48 = 188
; ldx #140
; lda #96
jsr HPLOT0
inc J
lda J
cmp #NUM
beq done_j
jmp inner_loop
done_j:
inc I
lda I
cmp #NUM
beq done_i
jmp outer_loop
done_i:
; t=t+(1.0/32.0);
; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256
; $0x08
clc
lda TL
adc #$8
sta TL
lda #0
adc TH
sta TH
end:
; flip pages
; if $20 (draw PAGE1) draw PAGE2, SHOW page1
; if $40 (draw PAGE2) draw PAGE1, SHOW page2
lda HGR_PAGE
eor #$60
sta HGR_PAGE
cmp #$40
bne flip2
flip1:
bit PAGE1
jmp next_frame
flip2:
bit PAGE2
jmp next_frame
;=======================
sin:
; / 6.28 is roughly the same as *0.16
; = .5 .25 .125 .0625 .03125
; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28
; i=(i*0x28)>>8;
lda IVL,Y
sta STEMP1L
lda IVH,Y
sta STEMP1H
already_loaded:
; i2=i<<3;
asl STEMP1L
rol STEMP1H
asl STEMP1L
rol STEMP1H
asl STEMP1L
rol STEMP1H
; i1=i<<5;
lda STEMP1L
sta STEMP2L
lda STEMP1H
sta STEMP2H
asl STEMP2L
rol STEMP2H
asl STEMP2L
rol STEMP2H
; i=(i1+i2)>>8;
clc
lda STEMP1L
adc STEMP2L
sta STEMP1L
lda STEMP1H
adc STEMP2H
sta STEMP1H
ldx STEMP1H
; sl=fsinh[i];
lda sin_lookup,X
asl
sta OUT1L,Y
bcs sin_negative
sin_positive:
lda #$0
beq set_sin_sign
sin_negative:
lda #$FF
set_sin_sign:
sta OUT1H,Y
rts
;=============================
cos:
; 1.57 is roughly 0x0192 in 8.8
clc
lda IVL,Y
adc #$92
sta STEMP1L
lda IVH,Y
adc #1
sta STEMP1H
jmp already_loaded
rh:
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
rl:
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B
.byte $32,$38,$3E,$45,$4B,$51,$57,$5E
.byte $64,$6A,$71,$77,$7D,$83,$8A,$90
.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2
sin_lookup:
.byte $00,$03,$06,$09,$0C,$0F,$12,$15,$18,$1C,$1F,$22,$25,$28,$2B,$2E
.byte $30,$33,$36,$39,$3C,$3F,$41,$44,$47,$49,$4C,$4E,$51,$53,$55,$58
.byte $5A,$5C,$5E,$60,$62,$64,$66,$68,$6A,$6C,$6D,$6F,$70,$72,$73,$74
.byte $76,$77,$78,$79,$7A,$7B,$7C,$7C,$7D,$7E,$7E,$7F,$7F,$7F,$7F,$7F
.byte $7F,$7F,$7F,$7F,$7F,$7F,$7E,$7E,$7D,$7C,$7C,$7B,$7A,$79,$78,$77
.byte $76,$75,$73,$72,$70,$6F,$6D,$6C,$6A,$68,$66,$64,$63,$61,$5E,$5C
.byte $5A,$58,$56,$53,$51,$4E,$4C,$49,$47,$44,$41,$3F,$3C,$39,$36,$34
.byte $31,$2E,$2B,$28,$25,$22,$1F,$1C,$19,$16,$12,$0F,$0C,$09,$06,$03
.byte $00,$FE,$FA,$F7,$F4,$F1,$EE,$EB,$E8,$E5,$E2,$DF,$DC,$D9,$D6,$D3
.byte $D0,$CD,$CA,$C7,$C4,$C2,$BF,$BC,$BA,$B7,$B4,$B2,$AF,$AD,$AB,$A8
.byte $A6,$A4,$A2,$A0,$9E,$9C,$9A,$98,$96,$95,$93,$91,$90,$8E,$8D,$8C
.byte $8A,$89,$88,$87,$86,$85,$84,$84,$83,$82,$82,$81,$81,$81,$81,$81
.byte $81,$81,$81,$81,$81,$81,$82,$82,$83,$84,$84,$85,$86,$87,$88,$89
.byte $8A,$8B,$8D,$8E,$8F,$91,$93,$94,$96,$98,$99,$9B,$9D,$9F,$A1,$A4
.byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC
.byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD

View File

@ -0,0 +1,91 @@
hgr_clear_screen:
lda #0 ; color
ldy HGR_PAGE
cpy #$40
beq hgr_page2_clearscreen
hgr_page1_clearscreen:
ldy #0
hgr_page1_cls_loop:
sta $2000,Y
sta $2100,Y
sta $2200,Y
sta $2300,Y
sta $2400,Y
sta $2500,Y
sta $2600,Y
sta $2700,Y
sta $2800,Y
sta $2900,Y
sta $2A00,Y
sta $2B00,Y
sta $2C00,Y
sta $2D00,Y
sta $2E00,Y
sta $2F00,Y
sta $3000,Y
sta $3100,Y
sta $3200,Y
sta $3300,Y
sta $3400,Y
sta $3500,Y
sta $3600,Y
sta $3700,Y
sta $3800,Y
sta $3900,Y
sta $3A00,Y
sta $3B00,Y
sta $3C00,Y
sta $3D00,Y
sta $3E00,Y
sta $3F00,Y
iny
bne hgr_page1_cls_loop
rts
hgr_page2_clearscreen:
ldy #0
hgr_page2_cls_loop:
sta $4000,Y
sta $4100,Y
sta $4200,Y
sta $4300,Y
sta $4400,Y
sta $4500,Y
sta $4600,Y
sta $4700,Y
sta $4800,Y
sta $4900,Y
sta $4A00,Y
sta $4B00,Y
sta $4C00,Y
sta $4D00,Y
sta $4E00,Y
sta $4F00,Y
sta $5000,Y
sta $5100,Y
sta $5200,Y
sta $5300,Y
sta $5400,Y
sta $5500,Y
sta $5600,Y
sta $5700,Y
sta $5800,Y
sta $5900,Y
sta $5A00,Y
sta $5B00,Y
sta $5C00,Y
sta $5D00,Y
sta $5E00,Y
sta $5F00,Y
iny
bne hgr_page2_cls_loop
rts

View File

@ -0,0 +1,97 @@
;div7_table = $b800
;mod7_table = $b900
;hposn_high = $ba00
;hposn_low = $bb00
hgr_make_tables:
;=====================
; make /7 %7 tables
;=====================
hgr_make_7_tables:
ldy #0
lda #0
ldx #0
div7_loop:
sta div7_table,Y
inx
cpx #7
bne div7_not7
clc
adc #1
ldx #0
div7_not7:
iny
bne div7_loop
ldy #0
lda #0
mod7_loop:
sta mod7_table,Y
clc
adc #1
cmp #7
bne mod7_not7
lda #0
mod7_not7:
iny
bne mod7_loop
; Hposn table
; hposn_low, hposn_high will each be filled with $C0 bytes
; based on routine by John Brooks
; posted on comp.sys.apple2 on 2018-07-11
; https://groups.google.com/d/msg/comp.sys.apple2/v2HOfHOmeNQ/zD76fJg_BAAJ
; clobbers A,X
; preserves Y
; vmw note: version I was using based on applesoft HPOSN was ~64 bytes
; this one is 37 bytes
build_hposn_tables:
ldx #0
btmi:
txa
and #$F8
bpl btpl1
ora #5
btpl1:
asl
bpl btpl2
ora #5
btpl2:
asl
asl
sta hposn_low, X
txa
and #7
rol
asl hposn_low, X
rol
ora #$20
sta hposn_high, X
inx
cpx #$C0
bne btmi
; go 16 beyond, which allows our text scrolling routine
ldx #16
extra_table_loop:
lda hposn_low,X
sta hposn_low+192,X
lda hposn_high,X
eor #$60
sta hposn_high+192,X
dex
bpl extra_table_loop
rts