bubble: more optimization

This commit is contained in:
Vince Weaver 2024-01-02 17:05:46 -05:00
parent 7216c531ff
commit 06ba6bfc22
3 changed files with 14 additions and 92 deletions

View File

@ -23,7 +23,9 @@
; 9DD73 = clear screen, only clear Y region we use
; 906FE = inline/unroll the sines
; 817BE = inline/unroll the cosines
; 817A7 = inline clear screen (now no stack usage)
; TODO: re-arrange order of U calculation
; TODO: use X for J counter (no stack so can use TXS/TSX)
; soft-switches
@ -117,11 +119,11 @@ next_frame:
;===========================
; "fast" clear screen
; FIXME: inline to save 12 cycles
jsr hgr_clear_part
.include "hgr_clear_part.s"
; FIXME: see value of X after clear
; FIXME: see value of X/Y/A after clear
ldx #0 ; 2
stx I ; 3
@ -182,7 +184,6 @@ no_rl_carry:
sta STEMP1L ; 3
lda IVH ; 3
; jsr sin
.include "sin_unrolled.s"
lda sin_table_low,X ; 4
@ -194,7 +195,6 @@ no_rl_carry:
sta STEMP1L ; 3
lda RXH ; 3
; jsr sin
.include "sin_unrolled.s"
clc
@ -216,7 +216,6 @@ no_rl_carry:
lda IVH ; 4
adc #1 ; 2
; jsr sin
.include "sin_unrolled.s"
lda sin_table_low,X ; 4
sta OUT1L ; 3
@ -231,7 +230,7 @@ no_rl_carry:
sta STEMP1L ; 3
lda RXH ; 3
adc #1 ; 2
; jsr cos
.include "sin_unrolled.s"
clc
@ -336,10 +335,7 @@ done_i:
; carry always set here as we got here from a BEQ
; (bcs=bge, bcc=blt)
; clc ; 2
lda TL ; 3
; adc #$8 ; 2
adc #$7 ; really 8, carry always set ; 2
sta TL ; 3
lda #0 ; 2
@ -365,83 +361,7 @@ flip2:
bit PAGE2
jmp next_frame
.if 0
;=======================
sin:
; / 6.28 is roughly the same as *0.16
; = .5 .25 .125 .0625 .03125
; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28
; i=(i*0x28)>>8;
lda IVL,Y ; note, uses absolute as no ZP equiv ; 4
sta STEMP1L ; 3
lda IVH,Y ; 4
already_loaded:
; A has STEMP1H
; i2=i<<3;
asl STEMP1L ; 5
rol ; 2
asl STEMP1L ; 5
rol ; 2
asl STEMP1L ; 5
rol ; 2
; i1=i<<5;
ldx STEMP1L ; 3
stx STEMP2L ; 3
sta STEMP1H ; 3
asl STEMP2L ; 5
rol ; 2
asl STEMP2L ; 5
rol ; 2
; i=(i1+i2)>>8;
; We ignore the low byte as we don't need it
; possibly inaccurate as we don't clear carry?
adc STEMP1H ; 2
tax ; 2
; sl=fsinh[i];
; tradeoff size for speed by having lookup
; table for sign bits
; the sign lookup only saves like 2 cycles
lda sin_table_low,X ; 4+
sta OUT1L,Y ; 5
lda sin_table_high,X ; 4+
sta OUT1H,Y ; 5
rts ; 6
;=============================
cos:
; 1.57 is roughly 0x0192 in 8.8
clc ; 2
lda IVL,Y ; 4
adc #$92 ; 2
sta STEMP1L ; 3
lda IVH,Y ; 4
adc #1 ; 2
; sta STEMP1H ; 3
jmp already_loaded ; 3
.endif
.include "hgr_clear_part.s"
;.include "hgr_clear_part.s"
.include "hgr_table.s"
.align $100

View File

@ -406,6 +406,8 @@ cos:
.include "hgr_clear_part.s"
rts
.include "hgr_table.s"
.align $100

View File

@ -243,7 +243,7 @@ hgr_page1_cls_loop:
hgr_page1_cls_done:
rts
jmp hgr_page2_cls_done
hgr_page2_clearscreen:
@ -473,8 +473,8 @@ hgr_page2_cls_loop:
.endif
iny
cpy #30
beq done_hgr_page2_cls
beq hgr_page2_cls_done
jmp hgr_page2_cls_loop
done_hgr_page2_cls:
rts
hgr_page2_cls_done:
; rts