pt3: some more size optimization

This commit is contained in:
Vince Weaver
2019-05-22 17:08:52 -04:00
parent 4b54357b3a
commit a4e247fd58
2 changed files with 200 additions and 192 deletions

View File

@@ -19,8 +19,8 @@ Code Optimization
Decoder Type size ZP use raw decode total CPU overhead Decoder Type size ZP use raw decode total CPU overhead
------------------------------------------------------------- -------------------------------------------------------------
Original 3407 22B 1F.22 31s 171s 18% Original 3407 22B 1F.22 31s 171s 18%
VolTableGen 3302 22B 20.0E 32s 171s 18% VolTableGen 3302 22B 20.0E 32s 171s 19%
SizeOpts 3262 22B 20.0A 32s 171s 19%

View File

@@ -4,13 +4,13 @@
; ;
; by Vince Weaver <vince@deater.net> ; by Vince Weaver <vince@deater.net>
; Roughly based on the Pascal code from Ay_Emul ; Roughly based on the Formats.pas Pascal code from Ay_Emul
; Size Optimization -- Mem+Code (pt3_lib_end-note_a) ; Size Optimization -- Mem+Code (pt3_lib_end-note_a)
; + 3407 bytes -- original working implementation ; + 3407 bytes -- original working implementation
; + 3302 bytes -- autogenerate the volume tables ; + 3302 bytes -- autogenerate the volume tables
; + 3297 bytes -- remove some un-needed bytes from struct
; + 3262 bytes -- combine some duplicated code in $1X/$BX env setting
; TODO ; TODO
@@ -19,54 +19,51 @@
; ;
; Use memset to set things to 0? ; Use memset to set things to 0?
NOTE_WHICH =0 NOTE_VOLUME =0
NOTE_VOLUME =1 NOTE_TONE_SLIDING_L =1
NOTE_TONE_SLIDING_L =2 NOTE_TONE_SLIDING_H =2
NOTE_TONE_SLIDING_H =3 NOTE_ENABLED =3
NOTE_ENABLED =4 NOTE_ENVELOPE_ENABLED =4
NOTE_ENVELOPE_ENABLED =5 NOTE_SAMPLE_POINTER_L =5
NOTE_SAMPLE_POINTER_L =6 NOTE_SAMPLE_POINTER_H =6
NOTE_SAMPLE_POINTER_H =7 NOTE_SAMPLE_LOOP =7
NOTE_SAMPLE_LOOP =8 NOTE_SAMPLE_LENGTH =8
NOTE_SAMPLE_LENGTH =9 NOTE_TONE_L =9
NOTE_TONE_L =10 NOTE_TONE_H =10
NOTE_TONE_H =11 NOTE_AMPLITUDE =11
NOTE_AMPLITUDE =12 NOTE_NOTE =12
NOTE_NOTE =13 NOTE_LEN =13
NOTE_LEN =14 NOTE_LEN_COUNT =14
NOTE_LEN_COUNT =15 NOTE_NEW_NOTE =15
NOTE_SPEC_COMMAND =16 ; is this one needed? NOTE_ADDR_L =16
NOTE_NEW_NOTE =17 NOTE_ADDR_H =17
NOTE_ADDR_L =18 NOTE_ORNAMENT_POINTER_L =18
NOTE_ADDR_H =19 NOTE_ORNAMENT_POINTER_H =19
NOTE_ORNAMENT_POINTER_L =20 NOTE_ORNAMENT_LOOP =20
NOTE_ORNAMENT_POINTER_H =21 NOTE_ORNAMENT_LENGTH =21
NOTE_ORNAMENT_LOOP =22 NOTE_ONOFF =22
NOTE_ORNAMENT_LENGTH =23 NOTE_TONE_ACCUMULATOR_L =23
NOTE_ONOFF =24 NOTE_TONE_ACCUMULATOR_H =24
NOTE_TONE_ACCUMULATOR_L =25 NOTE_TONE_SLIDE_COUNT =25
NOTE_TONE_ACCUMULATOR_H =26 NOTE_ORNAMENT_POSITION =26
NOTE_TONE_SLIDE_COUNT =27 NOTE_SAMPLE_POSITION =27
NOTE_ORNAMENT_POSITION =28 NOTE_ENVELOPE_SLIDING =28
NOTE_SAMPLE_POSITION =29 NOTE_NOISE_SLIDING =29
NOTE_ENVELOPE_SLIDING =30 NOTE_AMPLITUDE_SLIDING =30
NOTE_NOISE_SLIDING =31 NOTE_ONOFF_DELAY =31
NOTE_AMPLITUDE_SLIDING =32 NOTE_OFFON_DELAY =32
NOTE_ONOFF_DELAY =33 NOTE_TONE_SLIDE_STEP_L =33
NOTE_OFFON_DELAY =34 NOTE_TONE_SLIDE_STEP_H =34
NOTE_TONE_SLIDE_STEP_L =35 NOTE_TONE_SLIDE_DELAY =35
NOTE_TONE_SLIDE_STEP_H =36 NOTE_SIMPLE_GLISS =36
NOTE_TONE_SLIDE_DELAY =37 NOTE_SLIDE_TO_NOTE =37
NOTE_SIMPLE_GLISS =38 NOTE_TONE_DELTA_L =38
NOTE_SLIDE_TO_NOTE =39 NOTE_TONE_DELTA_H =39
NOTE_TONE_DELTA_L =40 NOTE_TONE_SLIDE_TO_STEP =40
NOTE_TONE_DELTA_H =41
NOTE_TONE_SLIDE_TO_STEP =42
NOTE_STRUCT_SIZE=43 NOTE_STRUCT_SIZE=41
note_a: note_a:
.byte 'A' ; NOTE_WHICH
.byte $0 ; NOTE_VOLUME .byte $0 ; NOTE_VOLUME
.byte $0 ; NOTE_TONE_SLIDING_L .byte $0 ; NOTE_TONE_SLIDING_L
.byte $0 ; NOTE_TONE_SLIDING_H .byte $0 ; NOTE_TONE_SLIDING_H
@@ -82,7 +79,6 @@ note_a:
.byte $0 ; NOTE_NOTE .byte $0 ; NOTE_NOTE
.byte $0 ; NOTE_LEN .byte $0 ; NOTE_LEN
.byte $0 ; NOTE_LEN_COUNT .byte $0 ; NOTE_LEN_COUNT
.byte $0 ; NOTE_SPEC_COMMAND
.byte $0 ; NOTE_NEW_NOTE .byte $0 ; NOTE_NEW_NOTE
.byte $0 ; NOTE_ADDR_L .byte $0 ; NOTE_ADDR_L
.byte $0 ; NOTE_ADDR_H .byte $0 ; NOTE_ADDR_H
@@ -111,7 +107,6 @@ note_a:
.byte $0 ; NOTE_TONE_SLIDE_TO_STEP .byte $0 ; NOTE_TONE_SLIDE_TO_STEP
note_b: note_b:
.byte 'B' ; NOTE_WHICH
.byte $0 ; NOTE_VOLUME .byte $0 ; NOTE_VOLUME
.byte $0 ; NOTE_TONE_SLIDING_L .byte $0 ; NOTE_TONE_SLIDING_L
.byte $0 ; NOTE_TONE_SLIDING_H .byte $0 ; NOTE_TONE_SLIDING_H
@@ -127,7 +122,6 @@ note_b:
.byte $0 ; NOTE_NOTE .byte $0 ; NOTE_NOTE
.byte $0 ; NOTE_LEN .byte $0 ; NOTE_LEN
.byte $0 ; NOTE_LEN_COUNT .byte $0 ; NOTE_LEN_COUNT
.byte $0 ; NOTE_SPEC_COMMAND
.byte $0 ; NOTE_NEW_NOTE .byte $0 ; NOTE_NEW_NOTE
.byte $0 ; NOTE_ADDR_L .byte $0 ; NOTE_ADDR_L
.byte $0 ; NOTE_ADDR_H .byte $0 ; NOTE_ADDR_H
@@ -156,7 +150,6 @@ note_b:
.byte $0 ; NOTE_TONE_SLIDE_TO_STEP .byte $0 ; NOTE_TONE_SLIDE_TO_STEP
note_c: note_c:
.byte 'C' ; NOTE_WHICH
.byte $0 ; NOTE_VOLUME .byte $0 ; NOTE_VOLUME
.byte $0 ; NOTE_TONE_SLIDING_L .byte $0 ; NOTE_TONE_SLIDING_L
.byte $0 ; NOTE_TONE_SLIDING_H .byte $0 ; NOTE_TONE_SLIDING_H
@@ -172,7 +165,6 @@ note_c:
.byte $0 ; NOTE_NOTE .byte $0 ; NOTE_NOTE
.byte $0 ; NOTE_LEN .byte $0 ; NOTE_LEN
.byte $0 ; NOTE_LEN_COUNT .byte $0 ; NOTE_LEN_COUNT
.byte $0 ; NOTE_SPEC_COMMAND
.byte $0 ; NOTE_NEW_NOTE .byte $0 ; NOTE_NEW_NOTE
.byte $0 ; NOTE_ADDR_L .byte $0 ; NOTE_ADDR_L
.byte $0 ; NOTE_ADDR_H .byte $0 ; NOTE_ADDR_H
@@ -227,6 +219,7 @@ pt3_pattern_done: .byte $0
temp_word_l: .byte $0 temp_word_l: .byte $0
temp_word_h: .byte $0 temp_word_h: .byte $0
note_command: ; shared space with sample_b0
sample_b0: .byte $0 sample_b0: .byte $0
sample_b1: .byte $0 sample_b1: .byte $0
@@ -496,8 +489,8 @@ not_ascii_number:
; if (PlParams.PT3.PT3_Version <= 4) ; if (PlParams.PT3.PT3_Version <= 4)
lda pt3_version lda pt3_version ; 4
cmp #5 cmp #5 ; 2
; carry clear = 3.3/3.4 table ; carry clear = 3.3/3.4 table
; carry set = 3.5 table ; carry set = 3.5 table
@@ -547,32 +540,32 @@ note_enabled:
tay ; 2 tay ; 2
; b0 = pt3->data[a->sample_pointer + a->sample_position * 4]; ; b0 = pt3->data[a->sample_pointer + a->sample_position * 4];
lda (SAMPLE_L),Y lda (SAMPLE_L),Y ; 5+
sta sample_b0 sta sample_b0 ; 4
; b1 = pt3->data[a->sample_pointer + a->sample_position * 4 + 1]; ; b1 = pt3->data[a->sample_pointer + a->sample_position * 4 + 1];
iny iny ; 2
lda (SAMPLE_L),Y lda (SAMPLE_L),Y ; 5+
sta sample_b1 sta sample_b1 ; 4
; a->tone = pt3->data[a->sample_pointer + a->sample_position * 4 + 2]; ; a->tone = pt3->data[a->sample_pointer + a->sample_position*4+2];
; a->tone += (pt3->data[a->sample_pointer + a->sample_position * 4 + 3])<<8; ; a->tone+=(pt3->data[a->sample_pointer + a->sample_position*4+3])<<8;
iny iny ; 2
lda (SAMPLE_L),Y lda (SAMPLE_L),Y ; 5+
sta note_a+NOTE_TONE_L,X sta note_a+NOTE_TONE_L,X ; 4
iny iny ; 2
lda (SAMPLE_L),Y lda (SAMPLE_L),Y ; 5+
sta note_a+NOTE_TONE_H,X sta note_a+NOTE_TONE_H,X ; 4
; a->tone += a->tone_accumulator; ; a->tone += a->tone_accumulator;
clc clc ; 2
lda note_a+NOTE_TONE_L,X lda note_a+NOTE_TONE_L,X ; 4+
adc note_a+NOTE_TONE_ACCUMULATOR_L,X adc note_a+NOTE_TONE_ACCUMULATOR_L,X ; 4+
sta note_a+NOTE_TONE_L,X sta note_a+NOTE_TONE_L,X ; 4+
lda note_a+NOTE_TONE_H,X lda note_a+NOTE_TONE_H,X ; 4+
adc note_a+NOTE_TONE_ACCUMULATOR_H,X adc note_a+NOTE_TONE_ACCUMULATOR_H,X ; 4+
sta note_a+NOTE_TONE_H,X sta note_a+NOTE_TONE_H,X ; 4+
;============================= ;=============================
; Accumulate tone if set ; Accumulate tone if set
@@ -981,139 +974,137 @@ done_onoff:
spec_command: .byte $0
;===================================== ;=====================================
; Decode Note ; Decode Note
;===================================== ;=====================================
; X points to the note offset ; X points to the note offset
; Timings (from ===>)
; 00: 14+30
; 0X: 14+15
; 1X: 14+5+
; 2X: 14+5+
decode_note: decode_note:
; Init vars ; Init vars
lda #0 ; 2 lda #0 ; 2
sta note_a+NOTE_NEW_NOTE,X ; for printing notes? sta note_a+NOTE_NEW_NOTE,X ; for printing notes? ; 5
sta note_a+NOTE_SPEC_COMMAND,X ; These are only if printing? sta spec_command ; 4
sta decode_done sta decode_done ; 4
; Skip decode if note still running ; Skip decode if note still running
lda note_a+NOTE_LEN_COUNT,X lda note_a+NOTE_LEN_COUNT,X ; 4+
cmp #2 cmp #2 ; 2
bcc keep_decoding ; blt, assume not negative bcc keep_decoding ; blt, assume not negative ; 2/3
; we are still running, decrement and early return ; we are still running, decrement and early return
dec note_a+NOTE_LEN_COUNT,X dec note_a+NOTE_LEN_COUNT,X ; 7
rts rts ; 6
keep_decoding: keep_decoding:
lda note_a+NOTE_NOTE,X ; store prev note lda note_a+NOTE_NOTE,X ; store prev note ; 4+
sta prev_note sta prev_note ; 4
lda note_a+NOTE_TONE_SLIDING_H,X ; store prev sliding lda note_a+NOTE_TONE_SLIDING_H,X ; store prev sliding ; 4+
sta prev_sliding_h sta prev_sliding_h ; 4
lda note_a+NOTE_TONE_SLIDING_L,X lda note_a+NOTE_TONE_SLIDING_L,X ; 4+
sta prev_sliding_l sta prev_sliding_l ; 4
ldy #0 ; 2
ldy #0 ;============
; 26
note_decode_loop: note_decode_loop:
lda note_a+NOTE_LEN,X ; re-up length count lda note_a+NOTE_LEN,X ; re-up length count ; 4+
sta note_a+NOTE_LEN_COUNT,X sta note_a+NOTE_LEN_COUNT,X ; 5
lda note_a+NOTE_ADDR_L,X
sta PATTERN_L
lda note_a+NOTE_ADDR_H,X
sta PATTERN_H
lda note_a+NOTE_ADDR_L,X ; 4+
sta PATTERN_L ; 3
lda note_a+NOTE_ADDR_H,X ; 4+
sta PATTERN_H ; 3
;===>
; get next value ; get next value
lda (PATTERN_L),Y lda (PATTERN_L),Y ; 5+
sta sample_b0 ; save to X termporarily sta note_command ; save termporarily ; 4
; FIXME: use a jump table?? ; FIXME: use a jump table??
; further reflection, that would require 32-bytes of addresses
; in addition to needing X or Y to index the jump table. hmmm
and #$f0 and #$f0 ; 2
cmp #$00 ; cmp #$00
bne decode_case_1X bne decode_case_1X ; 2/3
;=============
; 14
decode_case_0X: decode_case_0X:
;============================== ;==============================
; $0X set special effect ; $0X set special effect
;============================== ;==============================
; -1
lda note_command ; 4
and #$f ; 2
lda sample_b0 ; we can always store spec as 0 means no spec
and #$f
bne decode_case_0X_not_zero
; 00 case
; means end of pattern
sta note_a+NOTE_LEN_COUNT,X ; len_count=0;
lda #1
sta decode_done
dec pt3_pattern_done
jmp done_decode
decode_case_0X_not_zero:
; FIXME: what if multiple spec commands? ; FIXME: what if multiple spec commands?
; Doesn't seem to happen in practice ; Doesn't seem to happen in practice
; But AY_emul has code to handle it ; But AY_emul has code to handle it
sta note_a+NOTE_SPEC_COMMAND,X sta spec_command ; 4
bne decode_case_0X_not_zero ; 2/3
;=============
; 12
; 00 case
; means end of pattern
; -1
sta note_a+NOTE_LEN_COUNT,X ; len_count=0; ; 5
lda #1 ; 2
sta decode_done ; 4+
dec pt3_pattern_done ; 6
decode_case_0X_not_zero:
jmp done_decode ; 3
jmp done_decode
decode_case_1X: decode_case_1X:
;============================== ;==============================
; $1X -- Set Envelope Type ; $1X -- Set Envelope Type
;============================== ;==============================
cmp #$10 cmp #$10 ; 2
bne decode_case_2X bne decode_case_2X ; 2/3
;============
; 5
lda sample_b0 ; -1
lda note_command ; 4
and #$0f and #$0f
bne decode_case_not_10 bne decode_case_not_10 ; 2/3
decode_case_10: decode_case_10:
sta note_a+NOTE_ENVELOPE_ENABLED,X ; 10 case - disable
jmp decode_case_1x_common sta note_a+NOTE_ENVELOPE_ENABLED,X ; A is 0 ; 5
beq decode_case_1x_common ; branch always ; 3
decode_case_not_10: decode_case_not_10:
; Needed?
; FIXME: combine this with the BX code somehow?
; pt3->envelope_type_old=0x78;
sta pt3_envelope_type jsr set_envelope ; 6+64
lda #$78
sta pt3_envelope_type_old
; get next byte
iny
lda (PATTERN_L),Y
sta pt3_envelope_period_h
iny
lda (PATTERN_L),Y
sta pt3_envelope_period_l
lda #0
sta pt3_envelope_delay ; envelope_delay=0
sta pt3_envelope_slide_l ; envelope_slide=0
sta pt3_envelope_slide_h
lda #1
sta note_a+NOTE_ENVELOPE_ENABLED,X ; envelope_enabled=1
decode_case_1x_common: decode_case_1x_common:
@@ -1135,7 +1126,7 @@ decode_case_2X:
cmp #$20 cmp #$20
bne decode_case_3X bne decode_case_3X
lda sample_b0 lda note_command
and #$f and #$f
sta pt3_noise_period sta pt3_noise_period
@@ -1148,7 +1139,7 @@ decode_case_3X:
cmp #$30 cmp #$30
bne decode_case_4X bne decode_case_4X
lda sample_b0 lda note_command
and #$0f and #$0f
ora #$10 ora #$10
sta pt3_noise_period sta pt3_noise_period
@@ -1162,7 +1153,7 @@ decode_case_4X:
cmp #$40 cmp #$40
bne decode_case_5X bne decode_case_5X
lda sample_b0 lda note_command
and #$0f ; set ornament to bottom nibble and #$0f ; set ornament to bottom nibble
jsr load_ornament jsr load_ornament
@@ -1178,7 +1169,7 @@ decode_case_5X:
cmp #$B0 cmp #$B0
bcs decode_case_bX ; branch greater/equal bcs decode_case_bX ; branch greater/equal
lda sample_b0 lda note_command
sec sec
sbc #$50 sbc #$50
sta note_a+NOTE_NOTE,X ; note=(current_val-0x50); sta note_a+NOTE_NOTE,X ; note=(current_val-0x50);
@@ -1209,7 +1200,7 @@ decode_case_bX:
cmp #$b0 ; FIXME: this cmp not needed, from before? cmp #$b0 ; FIXME: this cmp not needed, from before?
bne decode_case_cX bne decode_case_cX
lda sample_b0 lda note_command
and #$f and #$f
beq decode_case_b0 beq decode_case_b0
cmp #1 cmp #1
@@ -1237,34 +1228,11 @@ decode_case_b1:
decode_case_bx_higher: decode_case_bx_higher:
; give fake old to force update? maybe only needed if printing?
; pt3->envelope_type_old=0x78;
sec sec
sbc #1 sbc #1 ; envelope_type=(current_val&0xf)-1;
sta pt3_envelope_type ; envelope_type=(current_val&0xf)-1;
lda #$78 jsr set_envelope ; 6+64
sta pt3_envelope_type_old
; get next byte
iny
lda (PATTERN_L),Y
sta pt3_envelope_period_h
; get next byte
iny
lda (PATTERN_L),Y
sta pt3_envelope_period_l
lda #0
sta note_a+NOTE_ORNAMENT_POSITION,X ; ornament_position=0
sta pt3_envelope_slide_l ; envelope_slide=0
sta pt3_envelope_slide_h
sta pt3_envelope_delay ; envelope_delay=0
lda #1
sta note_a+NOTE_ENVELOPE_ENABLED,X ; envelope_enabled=1;
jmp done_decode jmp done_decode
decode_case_cX: decode_case_cX:
@@ -1274,7 +1242,7 @@ decode_case_cX:
cmp #$c0 cmp #$c0
bne decode_case_dX bne decode_case_dX
lda sample_b0 lda note_command
and #$0f and #$0f
bne decode_case_cx_not_c0 bne decode_case_cx_not_c0
@@ -1314,7 +1282,7 @@ decode_case_dX:
cmp #$d0 cmp #$d0
bne decode_case_eX bne decode_case_eX
lda sample_b0 lda note_command
and #$0f and #$0f
bne decode_case_dx_not_d0 bne decode_case_dx_not_d0
@@ -1337,7 +1305,7 @@ decode_case_eX:
cmp #$e0 cmp #$e0
bne decode_case_fX bne decode_case_fX
lda sample_b0 lda note_command
sec sec
sbc #$d0 sbc #$d0
jsr load_sample jsr load_sample
@@ -1354,7 +1322,7 @@ decode_case_fX:
sta note_a+NOTE_ENVELOPE_ENABLED,X sta note_a+NOTE_ENVELOPE_ENABLED,X
; Set ornament to low byte of command ; Set ornament to low byte of command
lda sample_b0 lda note_command
and #$f and #$f
jsr load_ornament ; ornament to load in A jsr load_ornament ; ornament to load in A
@@ -1385,7 +1353,7 @@ done_decode:
; In the same order they appear. We don't bother? ; In the same order they appear. We don't bother?
handle_effects: handle_effects:
lda note_a+NOTE_SPEC_COMMAND,X lda spec_command ; 4
;============================== ;==============================
; Effect #1 -- Tone Down ; Effect #1 -- Tone Down
@@ -1630,6 +1598,46 @@ no_effect:
rts rts
;=======================================
; Set Envelope
;=======================================
; pulls out common code from $1X and $BX
; commands
; A = new envelope type
set_envelope:
sta pt3_envelope_type ; 4
; give fake old to force update? maybe only needed if printing?
; pt3->envelope_type_old=0x78;
lda #$78 ; 2
sta pt3_envelope_type_old ; 4
; get next byte
iny ; 2
lda (PATTERN_L),Y ; 5+
sta pt3_envelope_period_h ; 4
iny ; 2
lda (PATTERN_L),Y ; 5+
sta pt3_envelope_period_l ; 4
lda #0 ; 2
sta note_a+NOTE_ORNAMENT_POSITION,X ; ornament_position=0 ; 5
sta pt3_envelope_delay ; envelope_delay=0 ; 4
sta pt3_envelope_slide_l ; envelope_slide=0 ; 4
sta pt3_envelope_slide_h ; 4
lda #1 ; 2
sta note_a+NOTE_ENVELOPE_ENABLED,X ; envelope_enabled=1 ; 5
rts ; 6
;===========
; 64
;===================================== ;=====================================
; Decode Line ; Decode Line
;===================================== ;=====================================