bubble: optimize more

This commit is contained in:
Vince Weaver 2024-01-02 16:20:43 -05:00
parent 2675fcbd73
commit 1a6d39d63f
5 changed files with 1087 additions and 71 deletions

View File

@ -7,12 +7,13 @@ EMPTY_DISK = ../../../empty_disk
all: bubble.dsk make_table
bubble.dsk: HELLO BUBBLE.BAS BUBBLE BUBBLE_ORIG DIAMOND
bubble.dsk: HELLO BUBBLE.BAS BUBBLE BUBBLE_ORIG BUBBLE_ROLLED DIAMOND
cp $(EMPTY_DISK)/empty.dsk bubble.dsk
$(DOS33) -y bubble.dsk SAVE A HELLO
$(DOS33) -y bubble.dsk SAVE A BUBBLE.BAS
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE_ORIG
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 BUBBLE_ROLLED
$(DOS33) -y bubble.dsk BSAVE -a 0x0C00 DIAMOND
###
@ -25,7 +26,7 @@ HELLO: hello.bas
BUBBLE: bubble.o
ld65 -o BUBBLE bubble.o -C $(LINKER_SCRIPTS)/apple2_c00.inc
bubble.o: bubble.s hgr_clear_screen.s
bubble.o: bubble.s hgr_clear_part.s sin_unrolled.s
ca65 -o bubble.o bubble.s -l bubble.lst
###
@ -36,6 +37,15 @@ BUBBLE_ORIG: bubble_orig.o
bubble_orig.o: bubble_orig.s
ca65 -o bubble_orig.o bubble_orig.s -l bubble_orig.lst
###
BUBBLE_ROLLED: bubble_rolled.o
ld65 -o BUBBLE_ROLLED bubble_rolled.o -C $(LINKER_SCRIPTS)/apple2_c00.inc
bubble_rolled.o: bubble_rolled.s
ca65 -o bubble_rolled.o bubble_rolled.s -l bubble_rolled.lst
###
@ -63,4 +73,5 @@ make_table.o: make_table.c
###
clean:
rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE DIAMOND BUBBLE_ORIG
rm -f *~ *.o *.lst BUBBLE.BAS HELLO BUBBLE \
DIAMOND BUBBLE_ORIG BUBBLE_ROLLED

View File

@ -10,8 +10,8 @@
; after fast graphics
; D7E77 = 884343 = 1.1fps
; DD06E = ?? (made J countdown, why longer?)
; D7E77(??) = 884343 = 1.1fps
; DD06E = (made J countdown, why longer?)
; DB584 = destructive U when plotting
; D57A2 = rotate right instead of left for HPLOT *32 (U)
; D1D53 = same byt for V
@ -19,8 +19,12 @@
; AB2FC = optimize sine, keep H value in accumulator = 1.4fps
; A9A38 = optimize cosine slightly
; A50BF = use lookup table for sine sign (takes 256 more bytes)
; 9F673 = clear screen, only clear X region we use
; 9DD73 = clear screen, only clear Y region we use
; 906FE = inline/unroll the sines
; TODO: inline/unroll sine/cosine calls
; soft-switches
KEYPRESS = $C000
@ -89,6 +93,7 @@ bubble:
;=======================
; init graphics
jsr HGR
jsr HGR2
;=======================
@ -112,15 +117,18 @@ next_frame:
;===========================
; "fast" clear screen
; inline to save 12 cycles
; FIXME: inline to save 12 cycles
jsr hgr_clear_screen
jsr hgr_clear_part
; FIXME: see value of X after clear
ldx #0 ; 2
stx I ; 3
outer_loop:
;===========================================================
; setup R*I to inner loop
; save NUM*4 (128) cycles at expense of 11 cycles
@ -128,6 +136,8 @@ outer_loop:
lda rl,X ; 4
sta rl_smc+1 ; 4
; countdown NUM times
ldx #NUM ; 2
stx J ; 3
@ -143,10 +153,17 @@ rl_smc:
lda #0 ; R*I ; 2
adc XL ; 3
sta RXL ; 3
; 8 cycles to always add 0 for high byte
; since we're also copying can't save time by branching if no carry
; 3 if no carry / 2+5=7 if carry for branch/check
lda #0 ; 2
adc XH ; 3
sta RXH ; 3
no_rl_carry:
; fixed_add(i,0,vh,vl,&ivh,&ivl);
; precalc I+V for later use
@ -161,17 +178,45 @@ rl_smc:
; U=SIN(I+V)+SIN(RR+X)
ldy #0
jsr sin
ldy #2
jsr sin
; ldy #0
lda IVL ; 3
sta STEMP1L ; 3
lda IVH ; 3
; jsr sin
.include "sin_unrolled.s"
lda sin_table_low,X ; 4
sta OUT1L ; 3
lda sin_table_high,X ; 4
sta OUT1H ; 3
; ldy #2
lda RXL ; 3
sta STEMP1L ; 3
lda RXH ; 3
; jsr sin
.include "sin_unrolled.s"
; lda sin_table_low,X ; 4
; sta OUT1L ; 3
; lda sin_table_high,X ; 4
; sta OUT1H ; 3
clc
lda OUT1L
adc OUT2L
adc sin_table_low,X
sta UL
lda OUT1H
adc OUT2H
adc sin_table_high,X
sta UH
; V=COS(I+V)+COS(RR+X)
@ -269,23 +314,25 @@ rl_smc:
; bpl inner_loop
done_j:
inc I
lda I
cmp #NUM
beq done_i
jmp outer_loop
inc I ; 5
lda I ; 3
cmp #NUM ; 2
beq done_i ; 2/3
jmp outer_loop ; 3
done_i:
; t=t+(1.0/32.0);
; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256
; $0x08
; TODO: is CLC necessary? (bcs=bge, bcc=blt)
; carry always set here
; carry always set here as we got here from a BEQ
; (bcs=bge, bcc=blt)
clc ; 2
; clc ; 2
lda TL ; 3
adc #$8 ; 2
; adc #$8 ; 2
adc #$7 ; really 8, carry always set ; 2
sta TL ; 3
lda #0 ; 2
adc TH ; 3
@ -360,29 +407,14 @@ already_loaded:
; sl=fsinh[i];
; TODO: tradeoff size for speed by having lookup
; tradeoff size for speed by having lookup
; table for sign bits
; the sign lookup only saves like 2 cycles
lda sin_table_low,X
sta OUT1L,Y
lda sin_table_high,X
sta OUT1H,Y
.if 0
lda sin_lookup,X ; 4+
asl ; 2
lda sin_table_low,X ; 4+
sta OUT1L,Y ; 5
bcs sin_negative ; 2/3
sin_positive:
lda #$0 ; 2
beq set_sin_sign ; bra ; 3
sin_negative:
lda #$FF ; 2
set_sin_sign:
lda sin_table_high,X ; 4+
sta OUT1H,Y ; 5
.endif
rts ; 6
@ -402,36 +434,7 @@ cos:
jmp already_loaded ; 3
rl:
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B
.byte $32,$38,$3E,$45,$4B,$51,$57,$5E
.byte $64,$6A,$71,$77,$7D,$83,$8A,$90
.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2
.if 0
sin_lookup:
.byte $00,$03,$06,$09,$0C,$0F,$12,$15,$18,$1C,$1F,$22,$25,$28,$2B,$2E
.byte $30,$33,$36,$39,$3C,$3F,$41,$44,$47,$49,$4C,$4E,$51,$53,$55,$58
.byte $5A,$5C,$5E,$60,$62,$64,$66,$68,$6A,$6C,$6D,$6F,$70,$72,$73,$74
.byte $76,$77,$78,$79,$7A,$7B,$7C,$7C,$7D,$7E,$7E,$7F,$7F,$7F,$7F,$7F
.byte $7F,$7F,$7F,$7F,$7F,$7F,$7E,$7E,$7D,$7C,$7C,$7B,$7A,$79,$78,$77
.byte $76,$75,$73,$72,$70,$6F,$6D,$6C,$6A,$68,$66,$64,$63,$61,$5E,$5C
.byte $5A,$58,$56,$53,$51,$4E,$4C,$49,$47,$44,$41,$3F,$3C,$39,$36,$34
.byte $31,$2E,$2B,$28,$25,$22,$1F,$1C,$19,$16,$12,$0F,$0C,$09,$06,$03
.byte $00,$FE,$FA,$F7,$F4,$F1,$EE,$EB,$E8,$E5,$E2,$DF,$DC,$D9,$D6,$D3
.byte $D0,$CD,$CA,$C7,$C4,$C2,$BF,$BC,$BA,$B7,$B4,$B2,$AF,$AD,$AB,$A8
.byte $A6,$A4,$A2,$A0,$9E,$9C,$9A,$98,$96,$95,$93,$91,$90,$8E,$8D,$8C
.byte $8A,$89,$88,$87,$86,$85,$84,$84,$83,$82,$82,$81,$81,$81,$81,$81
.byte $81,$81,$81,$81,$81,$81,$82,$82,$83,$84,$84,$85,$86,$87,$88,$89
.byte $8A,$8B,$8D,$8E,$8F,$91,$93,$94,$96,$98,$99,$9B,$9D,$9F,$A1,$A4
.byte $A6,$A8,$AA,$AD,$AF,$B1,$B4,$B6,$B9,$BC,$BE,$C1,$C4,$C7,$C9,$CC
.byte $CF,$D2,$D5,$D8,$DB,$DE,$E1,$E4,$E7,$EA,$ED,$F0,$F4,$F7,$FA,$FD
.endif
log_lookup:
.byte $81,$82,$84,$88,$90,$A0,$C0,$80
.include "hgr_clear_screen.s"
.include "hgr_clear_part.s"
.include "hgr_table.s"
.align $100
@ -469,3 +472,13 @@ sin_table_high:
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
rl:
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B
.byte $32,$38,$3E,$45,$4B,$51,$57,$5E
.byte $64,$6A,$71,$77,$7D,$83,$8A,$90
.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2
log_lookup:
.byte $81,$82,$84,$88,$90,$A0,$C0,$80

View File

@ -0,0 +1,455 @@
; bubble universe -- Apple II Hires
; original = 612 bytes
; clear screen:
; bkgnd0 = $44198 = 278936 cycles = max ~4fps
; new: $A616 = 42518 = max ~22fps
; hplot
; hplot0 = ($14E-$15C) $14E = 334 * 1024 = 342016 = max ~3fps
; lookup = 46 * 1024 = 47104 = max ~21fps
; after fast graphics
; D7E77(??) = 884343 = 1.1fps
; DD06E = (made J countdown, why longer?)
; DB584 = destructive U when plotting
; D57A2 = rotate right instead of left for HPLOT *32 (U)
; D1D53 = same byt for V
; C2679 = optimize sine, don't care about bottom byte in addition
; AB2FC = optimize sine, keep H value in accumulator = 1.4fps
; A9A38 = optimize cosine slightly
; A50BF = use lookup table for sine sign (takes 256 more bytes)
; 9F673 = clear screen, only clear X region we use
; 9DD73 = clear screen, only clear Y region we use
; TODO: inline/unroll sine/cosine calls
; soft-switches
KEYPRESS = $C000
KEYRESET = $C010
PAGE1 = $C054
PAGE2 = $C055
; ROM routines
BKGND0 = $F3F4 ; clear current page to A
HGR2 = $F3D8 ; set hires page2 and clear $4000-$5fff
HGR = $F3E2 ; set hires page1 and clear $2000-$3fff
HPLOT0 = $F457 ; plot at (Y,X), (A)
HLINRL = $F530 ; line to (X,A), (Y)
HCOLOR1 = $F6F0 ; set HGR_COLOR to value in X
COLORTBL = $F6F6
WAIT = $FCA8 ; delay 1/2(26+27A+5A^2) us
; zero page
GBASL = $26
GBASH = $27
HPLOTXL = $90
HPLOTXH = $91
HPLOTYL = $92
HPLOTYH = $93
IVL = $94
IVH = $95
RXL = $96
RXH = $97
OUT1L = $98
OUT1H = $99
OUT2L = $9A
OUT2H = $9B
STEMP1L = $9C
STEMP1H = $9D
STEMP2L = $9E
STEMP2H = $9F
I = $D0
J = $D1
XL = $D4
XH = $D5
VL = $D6
VH = $D7
TL = $DA
TH = $DB
UL = $DC
UH = $DD
HGR_PAGE = $E6
; const
NUM = 32
bubble:
;========================
; setup lookup tables
jsr hgr_make_tables
;=======================
; init graphics
jsr HGR
jsr HGR2
;=======================
; init variables
lda #0
sta XL
sta XH
sta VL
sta VH
sta TL
sta TH
;=========================
;=========================
; main loop
;=========================
;=========================
next_frame:
;===========================
; "fast" clear screen
; FIXME: inline to save 12 cycles
jsr hgr_clear_part
; FIXME: see value of X after clear
ldx #0 ; 2
stx I ; 3
outer_loop:
;===========================================================
; setup R*I to inner loop
; save NUM*4 (128) cycles at expense of 11 cycles
ldx I ; 3
lda rl,X ; 4
sta rl_smc+1 ; 4
; countdown NUM times
ldx #NUM ; 2
stx J ; 3
inner_loop:
; fixed_add(rh[i],rl[i],xh,xl,&rxh,&rxl);
; note: rh is always 0
; pre-calc (R*I)+X for later use
clc ; 2
rl_smc:
lda #0 ; R*I ; 2
adc XL ; 3
sta RXL ; 3
; 8 cycles to always add 0 for high byte
; since we're also copying can't save time by branching if no carry
; 3 if no carry / 2+5=7 if carry for branch/check
lda #0 ; 2
adc XH ; 3
sta RXH ; 3
no_rl_carry:
; fixed_add(i,0,vh,vl,&ivh,&ivl);
; precalc I+V for later use
; this is 8.8 fixed point so bottom byte of I is 0
; clc ; C should be 0 from prev ;
lda VL ; 3
sta IVL ; 3
lda I ; 3
adc VH ; 3
sta IVH ; 3
; U=SIN(I+V)+SIN(RR+X)
ldy #0
jsr sin
ldy #2
jsr sin
clc
lda OUT1L
adc OUT2L
sta UL
lda OUT1H
adc OUT2H
sta UH
; V=COS(I+V)+COS(RR+X)
ldy #0
jsr cos
ldy #2
jsr cos
clc
lda OUT1L
adc OUT2L
sta VL
lda OUT1H
adc OUT2H
sta VH
; X=U+T
clc ; 2
lda UL ; 3
adc TL ; 3
sta XL ; 3
lda UH ; 3
adc TH ; 3
sta XH ; 3
; HPLOT 32*U+140,32*V+96
; U can be destroyed as we don't use it again?
; 01234567 89ABCDEF
; 56789ABC DEF00000
; we want 56789ABC, rotate right by 3 is two iterations faster?
lda UL ; 3
lsr UH ; 5
ror ; 2
lsr UH ; 5
ror ; 2
lsr UH ; 5
ror ; 2
clc ; 2
adc #140 ; 2
tax ; 2
; calculate Ypos
lda VH
sta HPLOTYL
lda VL
lsr HPLOTYL
ror
lsr HPLOTYL
ror
lsr HPLOTYL
ror
clc
adc #96
; "fast" hplot, Xpos in X, Ypos in A
tay ; 2
lda hposn_low,Y ; 4
sta GBASL ; 3
clc ; 2
lda hposn_high,Y ; 4
adc HGR_PAGE ; 3
sta GBASH ; 3
; 21
ldy div7_table,X ; 4
lda mod7_table,X ; 4
tax ; 2
; 31
lda (GBASL),Y ; 5
ora log_lookup,X ; 4
sta (GBASL),Y ; 6
; 46
dec J ; 5
bmi done_j ; 2/3
jmp inner_loop ; 3
; bpl inner_loop
done_j:
inc I ; 5
lda I ; 3
cmp #NUM ; 2
beq done_i ; 2/3
jmp outer_loop ; 3
done_i:
; t=t+(1.0/32.0);
; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256
; $0x08
; carry always set here as we got here from a BEQ
; (bcs=bge, bcc=blt)
; clc ; 2
lda TL ; 3
; adc #$8 ; 2
adc #$7 ; really 8, carry always set ; 2
sta TL ; 3
lda #0 ; 2
adc TH ; 3
sta TH ; 3
end:
; flip pages
; if $20 (draw PAGE1) draw PAGE2, SHOW page1
; if $40 (draw PAGE2) draw PAGE1, SHOW page2
lda HGR_PAGE
eor #$60
sta HGR_PAGE
cmp #$40
bne flip2
flip1:
bit PAGE1
jmp next_frame
flip2:
bit PAGE2
jmp next_frame
;=======================
sin:
; / 6.28 is roughly the same as *0.16
; = .5 .25 .125 .0625 .03125
; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28
; i=(i*0x28)>>8;
lda IVL,Y ; note, uses absolute as no ZP equiv ; 4
sta STEMP1L ; 3
lda IVH,Y ; 4
already_loaded:
; A has STEMP1H
; i2=i<<3;
asl STEMP1L ; 5
rol ; 2
asl STEMP1L ; 5
rol ; 2
asl STEMP1L ; 5
rol ; 2
; i1=i<<5;
ldx STEMP1L ; 3
stx STEMP2L ; 3
sta STEMP1H ; 3
asl STEMP2L ; 5
rol ; 2
asl STEMP2L ; 5
rol ; 2
; i=(i1+i2)>>8;
; We ignore the low byte as we don't need it
; possibly inaccurate as we don't clear carry?
adc STEMP1H ; 2
tax ; 2
; sl=fsinh[i];
; tradeoff size for speed by having lookup
; table for sign bits
; the sign lookup only saves like 2 cycles
lda sin_table_low,X ; 4+
sta OUT1L,Y ; 5
lda sin_table_high,X ; 4+
sta OUT1H,Y ; 5
rts ; 6
;=============================
cos:
; 1.57 is roughly 0x0192 in 8.8
clc ; 2
lda IVL,Y ; 4
adc #$92 ; 2
sta STEMP1L ; 3
lda IVH,Y ; 4
adc #1 ; 2
; sta STEMP1H ; 3
jmp already_loaded ; 3
.include "hgr_clear_part.s"
.include "hgr_table.s"
.align $100
sin_table_low:
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B,$31,$38,$3E,$44,$4A,$50,$56,$5C
.byte $61,$67,$6D,$73,$78,$7E,$83,$88,$8E,$93,$98,$9D,$A2,$A7,$AB,$B0
.byte $B4,$B9,$BD,$C1,$C5,$C9,$CD,$D1,$D4,$D8,$DB,$DE,$E1,$E4,$E7,$E9
.byte $EC,$EE,$F0,$F3,$F4,$F6,$F8,$F9,$FB,$FC,$FD,$FE,$FE,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FE,$FE,$FD,$FC,$FB,$F9,$F8,$F6,$F5,$F3,$F1,$EE
.byte $EC,$EA,$E7,$E4,$E1,$DE,$DB,$D8,$D5,$D1,$CD,$C9,$C6,$C2,$BD,$B9
.byte $B5,$B0,$AC,$A7,$A2,$9D,$98,$93,$8E,$89,$83,$7E,$78,$73,$6D,$68
.byte $62,$5C,$56,$50,$4A,$44,$3E,$38,$32,$2C,$25,$1F,$19,$13,$0C,$06
.byte $00,$FB,$F4,$EE,$E8,$E2,$DB,$D5,$CF,$C9,$C3,$BD,$B7,$B1,$AB,$A5
.byte $9F,$99,$93,$8E,$88,$83,$7D,$78,$73,$6D,$68,$63,$5E,$5A,$55,$50
.byte $4C,$47,$43,$3F,$3B,$37,$33,$30,$2C,$29,$25,$22,$1F,$1C,$19,$17
.byte $14,$12,$10,$0E,$0C,$0A,$08,$07,$06,$04,$03,$02,$02,$01,$01,$01
.byte $01,$01,$01,$01,$02,$02,$03,$04,$05,$07,$08,$0A,$0B,$0D,$0F,$11
.byte $14,$16,$19,$1C,$1E,$21,$25,$28,$2B,$2F,$32,$36,$3A,$3E,$42,$47
.byte $4B,$4F,$54,$59,$5E,$62,$67,$6C,$72,$77,$7C,$82,$87,$8D,$92,$98
.byte $9E,$A4,$AA,$AF,$B5,$BB,$C2,$C8,$CE,$D4,$DA,$E0,$E7,$ED,$F3,$F9
sin_table_high:
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
rl:
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B
.byte $32,$38,$3E,$45,$4B,$51,$57,$5E
.byte $64,$6A,$71,$77,$7D,$83,$8A,$90
.byte $96,$9D,$A3,$A9,$AF,$B6,$BC,$C2
log_lookup:
.byte $81,$82,$84,$88,$90,$A0,$C0,$80

View File

@ -0,0 +1,480 @@
; we only draw to
; Xpositions 140+-64 ( 76-204, /7 = 10 - 30)
; Ypositions 96+-64 = 32 - 160
hgr_clear_part:
lda #0 ; color
ldy HGR_PAGE
cpy #$40
bne hgr_page1_clearscreen
jmp hgr_page2_clearscreen
hgr_page1_clearscreen:
ldy #10
hgr_page1_cls_loop:
.if 0
sta $2000,Y ; 0
sta $2400,Y ; 1
sta $2800,Y ; 2
sta $2C00,Y ; 3
sta $3000,Y ; 4
sta $3400,Y ; 5
sta $3800,Y ; 6
sta $3C00,Y ; 7
sta $2080,Y ; 8
sta $2480,Y ; 9
sta $2880,Y ; 10
sta $2C80,Y ; 11
sta $3080,Y ; 12
sta $3480,Y ; 13
sta $3880,Y ; 14
sta $3C80,Y ; 15
sta $2100,Y ; 16
sta $2500,Y ; 17
sta $2900,Y ; 18
sta $2D00,Y ; 19
sta $3100,Y ; 20
sta $3500,Y ; 21
sta $3900,Y ; 22
sta $3D00,Y ; 23
sta $2180,Y ; 24
sta $2580,Y ; 25
sta $2980,Y ; 26
sta $2D80,Y ; 27
sta $3180,Y ; 28
sta $3580,Y ; 29
sta $3980,Y ; 30
sta $3D80,Y ; 31
.endif
sta $2200,Y ; 32
sta $2600,Y ; 33
sta $2A00,Y ; 34
sta $2E00,Y ; 35
sta $3200,Y ; 36
sta $3600,Y ; 37
sta $3A00,Y ; 38
sta $3E00,Y ; 39
sta $2280,Y ; 40
sta $2680,Y ; 41
sta $2A80,Y ; 42
sta $2E80,Y ; 43
sta $3280,Y ; 44
sta $3680,Y ; 45
sta $3A80,Y ; 46
sta $3E80,Y ; 47
sta $2300,Y ; 48
sta $2700,Y ; 49
sta $2B00,Y ; 50
sta $2F00,Y ; 51
sta $3300,Y ; 52
sta $3700,Y ; 53
sta $3B00,Y ; 54
sta $3F00,Y ; 55
sta $2380,Y ; 56
sta $2780,Y ; 57
sta $2B80,Y ; 58
sta $2F80,Y ; 59
sta $3380,Y ; 60
sta $3780,Y ; 61
sta $3B80,Y ; 62
sta $3F80,Y ; 63
;=======
sta $2028,Y ; 64
sta $2428,Y ; 1
sta $2828,Y ; 2
sta $2C28,Y ; 3
sta $3028,Y ; 4
sta $3428,Y ; 5
sta $3828,Y ; 6
sta $3C28,Y ; 7
sta $20A8,Y ; 72
sta $24A8,Y ; 9
sta $28A8,Y ; 10
sta $2CA8,Y ; 11
sta $30A8,Y ; 12
sta $34A8,Y ; 13
sta $38A8,Y ; 14
sta $3CA8,Y ; 15
sta $2128,Y ; 80
sta $2528,Y ; 17
sta $2928,Y ; 18
sta $2D28,Y ; 19
sta $3128,Y ; 20
sta $3528,Y ; 21
sta $3928,Y ; 22
sta $3D28,Y ; 23
sta $21A8,Y ; 88
sta $25A8,Y ; 25
sta $29A8,Y ; 26
sta $2DA8,Y ; 27
sta $31A8,Y ; 28
sta $35A8,Y ; 29
sta $39A8,Y ; 30
sta $3DA8,Y ; 31
sta $2228,Y ; 96
sta $2628,Y ; 33
sta $2A28,Y ; 34
sta $2E28,Y ; 35
sta $3228,Y ; 36
sta $3628,Y ; 37
sta $3A28,Y ; 38
sta $3E28,Y ; 39
sta $22A8,Y ; 104
sta $26A8,Y ; 41
sta $2AA8,Y ; 42
sta $2EA8,Y ; 43
sta $32A8,Y ; 44
sta $36A8,Y ; 45
sta $3AA8,Y ; 46
sta $3EA8,Y ; 47
sta $2328,Y ; 112
sta $2728,Y ; 49
sta $2B28,Y ; 50
sta $2F28,Y ; 51
sta $3328,Y ; 52
sta $3728,Y ; 53
sta $3B28,Y ; 54
sta $3F28,Y ; 55
sta $23A8,Y ; 120
sta $27A8,Y ; 57
sta $2BA8,Y ; 58
sta $2FA8,Y ; 59
sta $33A8,Y ; 60
sta $37A8,Y ; 61
sta $3BA8,Y ; 62
sta $3FA8,Y ; 63
;=========
sta $2050,Y ; 128
sta $2450,Y ; 1
sta $2850,Y ; 2
sta $2C50,Y ; 3
sta $3050,Y ; 4
sta $3450,Y ; 5
sta $3850,Y ; 6
sta $3C50,Y ; 7
sta $20D0,Y ; 136
sta $24D0,Y ; 9
sta $28D0,Y ; 10
sta $2CD0,Y ; 11
sta $30D0,Y ; 12
sta $34D0,Y ; 13
sta $38D0,Y ; 14
sta $3CD0,Y ; 15
sta $2150,Y ; 144
sta $2550,Y ; 17
sta $2950,Y ; 18
sta $2D50,Y ; 19
sta $3150,Y ; 20
sta $3550,Y ; 21
sta $3950,Y ; 22
sta $3D50,Y ; 23
sta $21D0,Y ; 152
sta $25D0,Y ; 25
sta $29D0,Y ; 26
sta $2DD0,Y ; 27
sta $31D0,Y ; 28
sta $35D0,Y ; 29
sta $39D0,Y ; 30
sta $3DD0,Y ; 31
.if 0
sta $2250,Y ; 160
sta $2650,Y ; 33
sta $2A50,Y ; 34
sta $2E50,Y ; 35
sta $3250,Y ; 36
sta $3650,Y ; 37
sta $3A50,Y ; 38
sta $3E50,Y ; 39
sta $22D0,Y ; 168
sta $26D0,Y ; 41
sta $2AD0,Y ; 42
sta $2ED0,Y ; 43
sta $32D0,Y ; 44
sta $36D0,Y ; 45
sta $3AD0,Y ; 46
sta $3ED0,Y ; 47
sta $2350,Y ; 176
sta $2750,Y ; 49
sta $2B50,Y ; 50
sta $2F50,Y ; 51
sta $3350,Y ; 52
sta $3750,Y ; 53
sta $3B50,Y ; 54
sta $3F50,Y ; 55
sta $23D0,Y ; 184
sta $27D0,Y ; 57
sta $2BD0,Y ; 58
sta $2FD0,Y ; 59
sta $33D0,Y ; 60
sta $37D0,Y ; 61
sta $3BD0,Y ; 62
sta $3FD0,Y ; 63
.endif
iny
cpy #30
beq hgr_page1_cls_done
jmp hgr_page1_cls_loop
hgr_page1_cls_done:
rts
hgr_page2_clearscreen:
ldy #10
hgr_page2_cls_loop:
.if 0
sta $4000,Y ; 0
sta $4400,Y ; 1
sta $4800,Y ; 2
sta $4C00,Y ; 3
sta $5000,Y ; 4
sta $5400,Y ; 5
sta $5800,Y ; 6
sta $5C00,Y ; 7
sta $4080,Y ; 8
sta $4480,Y ; 9
sta $4880,Y ; 10
sta $4C80,Y ; 11
sta $5080,Y ; 12
sta $5480,Y ; 13
sta $5880,Y ; 14
sta $5C80,Y ; 15
sta $4100,Y ; 16
sta $4500,Y ; 17
sta $4900,Y ; 18
sta $4D00,Y ; 19
sta $5100,Y ; 20
sta $5500,Y ; 21
sta $5900,Y ; 22
sta $5D00,Y ; 23
sta $4180,Y ; 24
sta $4580,Y ; 25
sta $4980,Y ; 26
sta $4D80,Y ; 27
sta $5180,Y ; 28
sta $5580,Y ; 29
sta $5980,Y ; 30
sta $5D80,Y ; 31
.endif
sta $4200,Y ; 32
sta $4600,Y ; 33
sta $4A00,Y ; 34
sta $4E00,Y ; 35
sta $5200,Y ; 36
sta $5600,Y ; 37
sta $5A00,Y ; 38
sta $5E00,Y ; 39
sta $4280,Y ; 40
sta $4680,Y ; 41
sta $4A80,Y ; 42
sta $4E80,Y ; 43
sta $5280,Y ; 44
sta $5680,Y ; 45
sta $5A80,Y ; 46
sta $5E80,Y ; 47
sta $4300,Y ; 48
sta $4700,Y ; 49
sta $4B00,Y ; 50
sta $4F00,Y ; 51
sta $5300,Y ; 52
sta $5700,Y ; 53
sta $5B00,Y ; 54
sta $5F00,Y ; 55
sta $4380,Y ; 56
sta $4780,Y ; 57
sta $4B80,Y ; 58
sta $4F80,Y ; 59
sta $5380,Y ; 60
sta $5780,Y ; 61
sta $5B80,Y ; 62
sta $5F80,Y ; 63
;=======
sta $4028,Y ; 64
sta $4428,Y ; 1
sta $4828,Y ; 2
sta $4C28,Y ; 3
sta $5028,Y ; 4
sta $5428,Y ; 5
sta $5828,Y ; 6
sta $5C28,Y ; 7
sta $40A8,Y ; 72
sta $44A8,Y ; 9
sta $48A8,Y ; 10
sta $4CA8,Y ; 11
sta $50A8,Y ; 12
sta $54A8,Y ; 13
sta $58A8,Y ; 14
sta $5CA8,Y ; 15
sta $4128,Y ; 80
sta $4528,Y ; 17
sta $4928,Y ; 18
sta $4D28,Y ; 19
sta $5128,Y ; 20
sta $5528,Y ; 21
sta $5928,Y ; 22
sta $5D28,Y ; 23
sta $41A8,Y ; 88
sta $45A8,Y ; 25
sta $49A8,Y ; 26
sta $4DA8,Y ; 27
sta $51A8,Y ; 28
sta $55A8,Y ; 29
sta $59A8,Y ; 30
sta $5DA8,Y ; 31
sta $4228,Y ; 96
sta $4628,Y ; 33
sta $4A28,Y ; 34
sta $4E28,Y ; 35
sta $5228,Y ; 36
sta $5628,Y ; 37
sta $5A28,Y ; 38
sta $5E28,Y ; 39
sta $42A8,Y ; 104
sta $46A8,Y ; 41
sta $4AA8,Y ; 42
sta $4EA8,Y ; 43
sta $52A8,Y ; 44
sta $56A8,Y ; 45
sta $5AA8,Y ; 46
sta $5EA8,Y ; 47
sta $4328,Y ; 112
sta $4728,Y ; 49
sta $4B28,Y ; 50
sta $4F28,Y ; 51
sta $5328,Y ; 52
sta $5728,Y ; 53
sta $5B28,Y ; 54
sta $5F28,Y ; 55
sta $43A8,Y ; 120
sta $47A8,Y ; 57
sta $4BA8,Y ; 58
sta $4FA8,Y ; 59
sta $53A8,Y ; 60
sta $57A8,Y ; 61
sta $5BA8,Y ; 62
sta $5FA8,Y ; 63
;=========
sta $4050,Y ; 128
sta $4450,Y ; 1
sta $4850,Y ; 2
sta $4C50,Y ; 3
sta $5050,Y ; 4
sta $5450,Y ; 5
sta $5850,Y ; 6
sta $5C50,Y ; 7
sta $40D0,Y ; 136
sta $44D0,Y ; 9
sta $48D0,Y ; 10
sta $4CD0,Y ; 11
sta $50D0,Y ; 12
sta $54D0,Y ; 13
sta $58D0,Y ; 14
sta $5CD0,Y ; 15
sta $4150,Y ; 144
sta $4550,Y ; 17
sta $4950,Y ; 18
sta $4D50,Y ; 19
sta $5150,Y ; 20
sta $5550,Y ; 21
sta $5950,Y ; 22
sta $5D50,Y ; 23
sta $41D0,Y ; 152
sta $45D0,Y ; 25
sta $49D0,Y ; 26
sta $4DD0,Y ; 27
sta $51D0,Y ; 28
sta $55D0,Y ; 29
sta $59D0,Y ; 30
sta $5DD0,Y ; 31
.if 0
sta $4250,Y ; 160
sta $4650,Y ; 33
sta $4A50,Y ; 34
sta $4E50,Y ; 35
sta $5250,Y ; 36
sta $5650,Y ; 37
sta $5A50,Y ; 38
sta $5E50,Y ; 39
sta $42D0,Y ; 168
sta $46D0,Y ; 41
sta $4AD0,Y ; 42
sta $4ED0,Y ; 43
sta $52D0,Y ; 44
sta $56D0,Y ; 45
sta $5AD0,Y ; 46
sta $5ED0,Y ; 47
sta $4350,Y ; 176
sta $4750,Y ; 49
sta $4B50,Y ; 50
sta $4F50,Y ; 51
sta $5350,Y ; 52
sta $5750,Y ; 53
sta $5B50,Y ; 54
sta $5F50,Y ; 55
sta $43D0,Y ; 184
sta $47D0,Y ; 57
sta $4BD0,Y ; 58
sta $4FD0,Y ; 59
sta $53D0,Y ; 60
sta $57D0,Y ; 61
sta $5BD0,Y ; 62
sta $5FD0,Y ; 63
.endif
iny
cpy #30
beq done_hgr_page2_cls
jmp hgr_page2_cls_loop
done_hgr_page2_cls:
rts

View File

@ -0,0 +1,57 @@
;=======================
;sin:
; / 6.28 is roughly the same as *0.16
; = .5 .25 .125 .0625 .03125
; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28
; i=(i*0x28)>>8;
; lda IVL,Y ; note, uses absolute as no ZP equiv ; 4
; sta STEMP1L ; 3
; lda IVH,Y ; 4
; A has STEMP1H
; i2=i<<3;
asl STEMP1L ; 5
rol ; 2
asl STEMP1L ; 5
rol ; 2
asl STEMP1L ; 5
rol ; 2
; i1=i<<5;
ldx STEMP1L ; 3
stx STEMP2L ; 3
sta STEMP1H ; 3
asl STEMP2L ; 5
rol ; 2
asl STEMP2L ; 5
rol ; 2
; i=(i1+i2)>>8;
; We ignore the low byte as we don't need it
; possibly inaccurate as we don't clear carry?
adc STEMP1H ; 2
tax ; 2
; sl=fsinh[i];
; tradeoff size for speed by having lookup
; table for sign bits
; the sign lookup only saves like 2 cycles
; lda sin_table_low,X ; 4+
; sta OUT1L,Y ; 5
; lda sin_table_high,X ; 4+
; sta OUT1H,Y ; 5