bubble: more speed optimization

This commit is contained in:
Vince Weaver 2024-01-02 02:06:18 -05:00
parent 028dda2869
commit 17630cded6

View File

@ -16,6 +16,10 @@
; D57A2 = rotate right instead of left for HPLOT *32 (U) ; D57A2 = rotate right instead of left for HPLOT *32 (U)
; D1D53 = same byt for V ; D1D53 = same byt for V
; C2679 = optimize sine, don't care about bottom byte in addition ; C2679 = optimize sine, don't care about bottom byte in addition
; AB2FC = optimize sine, keep H value in accumulator = 1.4fps
; A9A38 = optimize cosine slightly
; TODO: separate lookup table for sign
; TODO: inline/unroll sine/cosine calls
; soft-switches ; soft-switches
@ -235,25 +239,6 @@ rl_smc:
lsr HPLOTYL lsr HPLOTYL
ror ror
.if 0
lda VL
sta HPLOTYL
lda VH
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
asl HPLOTYL
rol
.endif
clc clc
adc #96 adc #96
@ -295,13 +280,16 @@ done_i:
; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256 ; 1/2 1/4 1/8 1/16 | 1/32 1/64 1/128 1/256
; $0x08 ; $0x08
clc ; TODO: is CLC necessary? (bcs=bge, bcc=blt)
lda TL ; carry always set here
adc #$8
sta TL clc ; 2
lda #0 lda TL ; 3
adc TH adc #$8 ; 2
sta TH sta TL ; 3
lda #0 ; 2
adc TH ; 3
sta TH ; 3
end: end:
; flip pages ; flip pages
@ -334,54 +322,47 @@ sin:
; i=(i*0x28)>>8; ; i=(i*0x28)>>8;
lda IVL,Y ; note, uses absolute as no ZP equiv ; 5 lda IVL,Y ; note, uses absolute as no ZP equiv ; 4
sta STEMP1L ; 3 sta STEMP1L ; 3
lda IVH,Y ; 5 lda IVH,Y ; 4
; sta STEMP1H ; 3
already_loaded: already_loaded:
; A has STEMP1H
; i2=i<<3; ; i2=i<<3;
; TODO: keep part in accumulator
asl STEMP1L ; 5 asl STEMP1L ; 5
; rol STEMP1H ; 5 rol ; 2
rol
asl STEMP1L ; 5 asl STEMP1L ; 5
; rol STEMP1H ; 5 rol ; 2
rol
asl STEMP1L ; 5 asl STEMP1L ; 5
; rol STEMP1H ; 5 rol ; 2
rol
; i1=i<<5; ; i1=i<<5;
ldx STEMP1L ; 3 ldx STEMP1L ; 3
stx STEMP2L ; 3 stx STEMP2L ; 3
; lda STEMP1H ; 3 sta STEMP1H ; 3
; sta STEMP2H ; 3
sta STEMP1H
asl STEMP2L ; 5 asl STEMP2L ; 5
; rol STEMP2H ; 5 rol ; 2
rol
asl STEMP2L ; 5 asl STEMP2L ; 5
; rol STEMP2H ; 5 rol ; 2
rol
; i=(i1+i2)>>8; ; i=(i1+i2)>>8;
; We ignore the low byte as we don't need it ; We ignore the low byte as we don't need it
; possibly inaccurate as we don't clear carry? ; possibly inaccurate as we don't clear carry?
adc STEMP1H adc STEMP1H ; 2
; lda STEMP1H ; 3
; adc STEMP2H ; 3
tax ; 2 tax ; 2
; sl=fsinh[i]; ; sl=fsinh[i];
; TODO: tradeoff size for speed by having lookup
; table for sign bits
lda sin_lookup,X ; 4+ lda sin_lookup,X ; 4+
asl ; 2 asl ; 2
sta OUT1L,Y ; 5 sta OUT1L,Y ; 5
@ -401,15 +382,16 @@ set_sin_sign:
cos: cos:
; 1.57 is roughly 0x0192 in 8.8 ; 1.57 is roughly 0x0192 in 8.8
clc clc ; 2
lda IVL,Y lda IVL,Y ; 4
adc #$92 adc #$92 ; 2
sta STEMP1L sta STEMP1L ; 3
lda IVH,Y
adc #1
sta STEMP1H
jmp already_loaded lda IVH,Y ; 4
adc #1 ; 2
; sta STEMP1H ; 3
jmp already_loaded ; 3
rl: rl: