mirror of
https://github.com/deater/dos33fsprogs.git
synced 2024-06-12 11:29:37 +00:00
bubble: one more optimization round
This commit is contained in:
parent
d0fda8e44d
commit
de0c366e75
|
@ -25,7 +25,13 @@
|
||||||
; 817BE = inline/unroll the cosines
|
; 817BE = inline/unroll the cosines
|
||||||
; 817A7 = inline clear screen (now no stack usage)
|
; 817A7 = inline clear screen (now no stack usage)
|
||||||
; 64987 = calc sine and cosine at same time = 2.5fps
|
; 64987 = calc sine and cosine at same time = 2.5fps
|
||||||
|
; 61827 = remove unnecessary stores
|
||||||
|
; 5D7EF = add a cosine table
|
||||||
|
; 5CFBE = move TL/TH out of zero page
|
||||||
|
; 5C353 = put UL in Y = ~2.6fps
|
||||||
|
|
||||||
|
; NUM=24 35CD3 = ~4.5fps
|
||||||
|
; NUM=16 1A2DD = ~9 fps
|
||||||
|
|
||||||
; soft-switches
|
; soft-switches
|
||||||
|
|
||||||
|
@ -51,39 +57,32 @@ GBASL = $26
|
||||||
GBASH = $27
|
GBASH = $27
|
||||||
|
|
||||||
|
|
||||||
HPLOTXL = $90
|
|
||||||
HPLOTXH = $91
|
|
||||||
HPLOTYL = $92
|
HPLOTYL = $92
|
||||||
HPLOTYH = $93
|
|
||||||
IVL = $94
|
|
||||||
IVH = $95
|
|
||||||
RXL = $96
|
RXL = $96
|
||||||
RXH = $97
|
RXH = $97
|
||||||
OUT1L = $98
|
STEMP1L = $98
|
||||||
OUT1H = $99
|
STEMP1H = $99
|
||||||
OUT2L = $9A
|
STEMP2L = $9A
|
||||||
OUT2H = $9B
|
|
||||||
STEMP1L = $9C
|
|
||||||
STEMP1H = $9D
|
|
||||||
STEMP2L = $9E
|
|
||||||
STEMP2H = $9F
|
|
||||||
|
|
||||||
I = $D0
|
I = $D0
|
||||||
J = $D1
|
J = $D1
|
||||||
XL = $D4
|
XL = $D2
|
||||||
XH = $D5
|
XH = $D3
|
||||||
VL = $D6
|
VL = $D4
|
||||||
VH = $D7
|
VH = $D5
|
||||||
TL = $DA
|
;TL = $D6
|
||||||
TH = $DB
|
;TH = $D7
|
||||||
UL = $DC
|
UL = $D8
|
||||||
UH = $DD
|
UH = $D9
|
||||||
|
|
||||||
HGR_PAGE = $E6
|
HGR_PAGE = $E6
|
||||||
|
|
||||||
; const
|
; const
|
||||||
|
|
||||||
NUM = 32
|
;NUM = 32
|
||||||
|
NUM = 24
|
||||||
|
|
||||||
bubble:
|
bubble:
|
||||||
|
|
||||||
|
@ -106,8 +105,8 @@ bubble:
|
||||||
sta XH
|
sta XH
|
||||||
sta VL
|
sta VL
|
||||||
sta VH
|
sta VH
|
||||||
sta TL
|
; sta TL
|
||||||
sta TH
|
; sta TH
|
||||||
|
|
||||||
;=========================
|
;=========================
|
||||||
;=========================
|
;=========================
|
||||||
|
@ -164,114 +163,133 @@ rl_smc:
|
||||||
adc XH ; 3
|
adc XH ; 3
|
||||||
sta RXH ; 3
|
sta RXH ; 3
|
||||||
|
|
||||||
no_rl_carry:
|
;=======================================
|
||||||
|
|
||||||
; fixed_add(i,0,vh,vl,&ivh,&ivl);
|
|
||||||
|
|
||||||
; precalc I+V for later use
|
; precalc I+V for later use
|
||||||
|
; IV=I+V
|
||||||
; this is 8.8 fixed point so bottom byte of I is 0
|
; this is 8.8 fixed point so bottom byte of I is 0
|
||||||
|
|
||||||
; clc ; C should be 0 from prev ;
|
; clc ; C should be 0 from prev ;
|
||||||
lda VL ; 3
|
lda VL ; 3
|
||||||
sta IVL ; 3
|
sta STEMP1L ; store IVL directly in sine input ; 3
|
||||||
lda I ; 3
|
lda I ; 3
|
||||||
adc VH ; 3
|
adc VH ; IVH is in A for sine ; 3
|
||||||
sta IVH ; 3
|
|
||||||
|
|
||||||
;=========================
|
;=========================
|
||||||
; U=SIN(I+V)+SIN(RR+X)
|
; U=SIN(I+V)+SIN(RR+X)
|
||||||
; V=COS(I+V)+COS(RR+X)
|
; V=COS(I+V)+COS(RR+X)
|
||||||
|
|
||||||
lda IVL ; 3
|
; calc SIN(I+V) in STEMP1L/A
|
||||||
sta STEMP1L ; 3
|
|
||||||
lda IVH ; 3
|
|
||||||
|
|
||||||
.include "sin_unrolled.s"
|
; / 6.28 is roughly the same as *0.16
|
||||||
|
; = .5 .25 .125 .0625 .03125
|
||||||
|
; 1/6.28 = 0.16 = 0 0 1 0 1 0 0 0 = 0x28
|
||||||
|
|
||||||
|
; i=(i*0x28)>>8;
|
||||||
|
|
||||||
|
; A has STEMP1H
|
||||||
|
|
||||||
|
|
||||||
|
; 01234567 89ABCDEF
|
||||||
|
|
||||||
|
; 3456789A
|
||||||
|
; + 56789ABC
|
||||||
|
;=============
|
||||||
|
|
||||||
|
; i2=i<<3;
|
||||||
|
|
||||||
|
asl STEMP1L ; 5
|
||||||
|
rol ; 2
|
||||||
|
asl STEMP1L ; 5
|
||||||
|
rol ; 2
|
||||||
|
asl STEMP1L ; 5
|
||||||
|
rol ; 2
|
||||||
|
|
||||||
|
; i1=i<<5;
|
||||||
|
|
||||||
|
ldx STEMP1L ; 3
|
||||||
|
stx STEMP2L ; 3
|
||||||
|
sta STEMP1H ; 3
|
||||||
|
|
||||||
|
|
||||||
|
asl STEMP2L ; 5
|
||||||
|
rol ; 2
|
||||||
|
asl STEMP2L ; 5
|
||||||
|
rol ; 2
|
||||||
|
|
||||||
|
; i=(i1+i2)>>8;
|
||||||
|
|
||||||
|
; We ignore the low byte as we don't need it
|
||||||
|
; possibly inaccurate as we don't clear carry?
|
||||||
|
|
||||||
|
adc STEMP1H ; 2
|
||||||
|
tax ; 2
|
||||||
|
|
||||||
|
; tradeoff size for speed by having lookup
|
||||||
|
; table for sign bits
|
||||||
|
; the sign lookup only saves like 2 cycles
|
||||||
|
|
||||||
|
|
||||||
|
;==========================
|
||||||
|
; U=sin(IV)
|
||||||
|
|
||||||
lda sin_table_low,X ; 4
|
lda sin_table_low,X ; 4
|
||||||
sta UL ; 3
|
sta UL ; 3
|
||||||
lda sin_table_high,X ; 4
|
lda sin_table_high,X ; 4
|
||||||
sta UH ; 3
|
sta UH ; 3
|
||||||
|
|
||||||
txa
|
;===========================
|
||||||
clc
|
; V=sin(IV)
|
||||||
adc #64
|
|
||||||
tax
|
|
||||||
|
|
||||||
lda sin_table_low,X ; 4
|
lda cos_table_low,X ; 4
|
||||||
sta VL ; 3
|
sta VL ; 3
|
||||||
lda sin_table_high,X ; 4
|
lda cos_table_high,X ; 4
|
||||||
sta VH ; 3
|
sta VH ; 3
|
||||||
|
|
||||||
|
|
||||||
lda RXL ; 3
|
lda RXL ; 3
|
||||||
sta STEMP1L ; 3
|
sta STEMP1L ; 3
|
||||||
lda RXH ; 3
|
lda RXH ; 3
|
||||||
|
|
||||||
.include "sin_unrolled.s"
|
.include "sin_unrolled.s"
|
||||||
|
|
||||||
clc
|
;=====================
|
||||||
lda UL
|
; U+=sin(RX)
|
||||||
adc sin_table_low,X
|
|
||||||
sta UL
|
|
||||||
lda UH
|
|
||||||
adc sin_table_high,X
|
|
||||||
sta UH
|
|
||||||
|
|
||||||
txa
|
|
||||||
clc
|
|
||||||
adc #64
|
|
||||||
tax
|
|
||||||
|
|
||||||
.if 0
|
|
||||||
; 1.57 is roughly 0x0192 in 8.8
|
|
||||||
clc ; 2
|
clc ; 2
|
||||||
lda IVL ; 3
|
lda UL ; 3
|
||||||
adc #$92 ; 2
|
adc sin_table_low,X ; 4
|
||||||
sta STEMP1L ; 3
|
tay ; UL in Y ; 2
|
||||||
|
; sta UL ; 3
|
||||||
|
lda UH ; 3
|
||||||
|
adc sin_table_high,X ; 4
|
||||||
|
sta UH ; 3
|
||||||
|
|
||||||
lda IVH ; 4
|
;================
|
||||||
adc #1 ; 2
|
; V+=cos(RX)
|
||||||
|
|
||||||
.include "sin_unrolled.s"
|
|
||||||
lda sin_table_low,X ; 4
|
|
||||||
sta OUT1L ; 3
|
|
||||||
lda sin_table_high,X ; 4
|
|
||||||
sta OUT1H ; 3
|
|
||||||
|
|
||||||
|
|
||||||
; 1.57 is roughly 0x0192 in 8.8
|
|
||||||
clc ; 2
|
clc ; 2
|
||||||
lda RXL ; 3
|
lda VL ; 3
|
||||||
adc #$92 ; 2
|
adc cos_table_low,X ; 4+
|
||||||
sta STEMP1L ; 3
|
sta VL ; 3
|
||||||
lda RXH ; 3
|
lda VH ; 3
|
||||||
adc #1 ; 2
|
adc cos_table_high,X ; 4+
|
||||||
|
sta VH ; 3
|
||||||
.include "sin_unrolled.s"
|
|
||||||
.endif
|
|
||||||
|
|
||||||
clc
|
|
||||||
lda VL
|
|
||||||
adc sin_table_low,X
|
|
||||||
sta VL
|
|
||||||
lda VH
|
|
||||||
adc sin_table_high,X
|
|
||||||
sta VH
|
|
||||||
|
|
||||||
|
|
||||||
; X=U+T
|
; X=U+T
|
||||||
clc ; 2
|
clc ; 2
|
||||||
lda UL ; 3
|
; lda UL ; 3
|
||||||
adc TL ; 3
|
tya ; UL in Y ; 2
|
||||||
|
tl_smc:
|
||||||
|
adc #0 ; 2
|
||||||
sta XL ; 3
|
sta XL ; 3
|
||||||
lda UH ; 3
|
lda UH ; 3
|
||||||
adc TH ; 3
|
th_smc:
|
||||||
|
adc #0 ; 2
|
||||||
sta XH ; 3
|
sta XH ; 3
|
||||||
|
|
||||||
|
;===========================================================
|
||||||
; HPLOT 32*U+140,32*V+96
|
; HPLOT 32*U+140,32*V+96
|
||||||
|
|
||||||
; U can be destroyed as we don't use it again?
|
; U can be destroyed as we don't use it again
|
||||||
|
|
||||||
; 01234567 89ABCDEF
|
; 01234567 89ABCDEF
|
||||||
|
|
||||||
|
@ -279,7 +297,9 @@ no_rl_carry:
|
||||||
|
|
||||||
; we want 56789ABC, rotate right by 3 is two iterations faster?
|
; we want 56789ABC, rotate right by 3 is two iterations faster?
|
||||||
|
|
||||||
lda UL ; 3
|
tya ; UL in Y ; 2
|
||||||
|
|
||||||
|
; lda UL ; 3
|
||||||
|
|
||||||
lsr UH ; 5
|
lsr UH ; 5
|
||||||
ror ; 2
|
ror ; 2
|
||||||
|
@ -353,12 +373,12 @@ done_i:
|
||||||
; carry always set here as we got here from a BEQ
|
; carry always set here as we got here from a BEQ
|
||||||
; (bcs=bge, bcc=blt)
|
; (bcs=bge, bcc=blt)
|
||||||
|
|
||||||
lda TL ; 3
|
lda tl_smc+1 ; 4
|
||||||
adc #$7 ; really 8, carry always set ; 2
|
adc #$7 ; really 8, carry always set ; 2
|
||||||
sta TL ; 3
|
sta tl_smc+1 ; 4
|
||||||
lda #0 ; 2
|
lda #0 ; 2
|
||||||
adc TH ; 3
|
adc th_smc+1 ; 4
|
||||||
sta TH ; 3
|
sta th_smc+1 ; 4
|
||||||
|
|
||||||
end:
|
end:
|
||||||
; flip pages
|
; flip pages
|
||||||
|
@ -388,6 +408,7 @@ sin_table_low:
|
||||||
.byte $61,$67,$6D,$73,$78,$7E,$83,$88,$8E,$93,$98,$9D,$A2,$A7,$AB,$B0
|
.byte $61,$67,$6D,$73,$78,$7E,$83,$88,$8E,$93,$98,$9D,$A2,$A7,$AB,$B0
|
||||||
.byte $B4,$B9,$BD,$C1,$C5,$C9,$CD,$D1,$D4,$D8,$DB,$DE,$E1,$E4,$E7,$E9
|
.byte $B4,$B9,$BD,$C1,$C5,$C9,$CD,$D1,$D4,$D8,$DB,$DE,$E1,$E4,$E7,$E9
|
||||||
.byte $EC,$EE,$F0,$F3,$F4,$F6,$F8,$F9,$FB,$FC,$FD,$FE,$FE,$FF,$FF,$FF
|
.byte $EC,$EE,$F0,$F3,$F4,$F6,$F8,$F9,$FB,$FC,$FD,$FE,$FE,$FF,$FF,$FF
|
||||||
|
cos_table_low:
|
||||||
.byte $FF,$FF,$FF,$FF,$FE,$FE,$FD,$FC,$FB,$F9,$F8,$F6,$F5,$F3,$F1,$EE
|
.byte $FF,$FF,$FF,$FF,$FE,$FE,$FD,$FC,$FB,$F9,$F8,$F6,$F5,$F3,$F1,$EE
|
||||||
.byte $EC,$EA,$E7,$E4,$E1,$DE,$DB,$D8,$D5,$D1,$CD,$C9,$C6,$C2,$BD,$B9
|
.byte $EC,$EA,$E7,$E4,$E1,$DE,$DB,$D8,$D5,$D1,$CD,$C9,$C6,$C2,$BD,$B9
|
||||||
.byte $B5,$B0,$AC,$A7,$A2,$9D,$98,$93,$8E,$89,$83,$7E,$78,$73,$6D,$68
|
.byte $B5,$B0,$AC,$A7,$A2,$9D,$98,$93,$8E,$89,$83,$7E,$78,$73,$6D,$68
|
||||||
|
@ -400,11 +421,20 @@ sin_table_low:
|
||||||
.byte $14,$16,$19,$1C,$1E,$21,$25,$28,$2B,$2F,$32,$36,$3A,$3E,$42,$47
|
.byte $14,$16,$19,$1C,$1E,$21,$25,$28,$2B,$2F,$32,$36,$3A,$3E,$42,$47
|
||||||
.byte $4B,$4F,$54,$59,$5E,$62,$67,$6C,$72,$77,$7C,$82,$87,$8D,$92,$98
|
.byte $4B,$4F,$54,$59,$5E,$62,$67,$6C,$72,$77,$7C,$82,$87,$8D,$92,$98
|
||||||
.byte $9E,$A4,$AA,$AF,$B5,$BB,$C2,$C8,$CE,$D4,$DA,$E0,$E7,$ED,$F3,$F9
|
.byte $9E,$A4,$AA,$AF,$B5,$BB,$C2,$C8,$CE,$D4,$DA,$E0,$E7,$ED,$F3,$F9
|
||||||
|
cos_table_low_tail:
|
||||||
|
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B,$31,$38,$3E,$44,$4A,$50,$56,$5C
|
||||||
|
.byte $61,$67,$6D,$73,$78,$7E,$83,$88,$8E,$93,$98,$9D,$A2,$A7,$AB,$B0
|
||||||
|
.byte $B4,$B9,$BD,$C1,$C5,$C9,$CD,$D1,$D4,$D8,$DB,$DE,$E1,$E4,$E7,$E9
|
||||||
|
.byte $EC,$EE,$F0,$F3,$F4,$F6,$F8,$F9,$FB,$FC,$FD,$FE,$FE,$FF,$FF,$FF
|
||||||
|
|
||||||
|
.align $100
|
||||||
|
|
||||||
sin_table_high:
|
sin_table_high:
|
||||||
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
|
cos_table_high:
|
||||||
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
|
@ -417,6 +447,11 @@ sin_table_high:
|
||||||
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
|
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
|
||||||
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
|
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
|
||||||
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
|
.byte $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
|
||||||
|
cos_table_high_tail:
|
||||||
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
|
.byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
|
||||||
|
|
||||||
rl:
|
rl:
|
||||||
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B
|
.byte $00,$06,$0C,$12,$19,$1F,$25,$2B
|
||||||
|
|
Loading…
Reference in New Issue
Block a user