chiptune_player: implemented stepwise lz4

better, but still not acceptable
This commit is contained in:
Vince Weaver 2018-02-25 01:08:14 -05:00
parent 2ebf6c37e8
commit 62391aa514
8 changed files with 279 additions and 22 deletions

View File

@ -0,0 +1,219 @@
; Stepwise LZ4 data decompressor for Apple II
; Only decodes X tokens before returning
; Code by Peter Ferrie (qkumba) (peter.ferrie@gmail.com)
; "LZ4 unpacker in 143 bytes (6502 version) (2013)"
; http://pferrie.host22.com/misc/appleii.htm
; This is that code, but with comments and labels added for clarity.
; I also found a bug when decoding with runs of multiples of 256
; which has since been fixed upstream.
; For LZ4 reference see
; https://github.com/lz4/lz4/wiki/lz4_Frame_format.md
; LZ4 summary:
;
; HEADER:
; Should: check for magic number 04 22 4d 18
; FLG: 64 in our case (01=version, block.index=1, block.checksum=0
; size=0, checksum=1, reserved
; MAX Blocksize: 40 (64kB)
; HEADER CHECKSUM: a7
; BLOCK HEADER: 4 bytes (le) If highest bit set, uncompressed!
; BLOCKS:
; Token byte. High 4-bits literal length, low 4-bits copy length
; + If literal length==15, then following byte gets added to length
; If that byte was 255, then keep adding bytes until not 255
; + The literal bytes follow. There may be zero of them
; + Next is block copy info. little-endian 2-byte offset to
; be subtracted from current read position indicating source
; + The low 4-bits of the token are the copy length, which needs
; 4 added to it. As with the literal length, if it is 15 then
; you read a byte and add (and if that byte is 255, keep adding)
;LZ4_SRC EQU $00
;LZ4_DST EQU $02
;LZ4_END EQU $04
;COUNT EQU $06
;DELTA EQU $08
;UNPACK_BUFFER EQU $5E00 ; offset of first unpacked byte
;======================
; LZ4 decode
;======================
; input buffer in LZ4_SRC
; output buffer hardcoded still
; size in ENDH:ENDL
lz4_decode_setup:
lda LZ4_SRC ; packed data offset
clc
adc LZ4_END
sta LZ4_END
lda LZ4_SRC+1
adc LZ4_END+1
sta LZ4_END+1
lda #>UNPACK_BUFFER ; original unpacked data offset
sta LZ4_DST+1
lda #<UNPACK_BUFFER
sta LZ4_DST
rts
lz4_decode_step:
ldy #0 ; used to index, always zero
parsetoken:
jsr getsrc ; get next token
pha ; save for later (need bottom 4 bits)
lsr ; number of literals in top 4 bits
lsr ; so shift into place
lsr
lsr
beq copymatches ; if zero, then no literals
; jump ahead and copy
jsr buildcount ; add up all the literal sizes
; result is in ram[count+1]-1:A
tax ; now in ram[count+1]-1:X
jsr docopy ; copy the literals
lda LZ4_SRC ; 16-bit compare
cmp LZ4_END ; to see if we have reached the end
lda LZ4_SRC+1
sbc LZ4_END+1
bcs done
copymatches:
jsr getsrc ; get 16-bit delta value
sta DELTA
jsr getsrc
sta DELTA+1
pla ; restore token
and #$0f ; get bottom 4 bits
; match count. 0 means 4
; 15 means 19+, must be calculated
jsr buildcount ; add up count bits, in ram[count+1]-:A
clc
adc #4 ; adjust count by 4 (minmatch)
tax ; now in ramp[count+1]-1:X
beq copy_no_adjust ; BUGFIX, don't increment if
; exactly a multiple of 0x100
bcc copy_no_adjust
inc COUNT+1 ; increment if we overflowed
copy_no_adjust:
lda LZ4_SRC+1 ; save src on stack
pha
lda LZ4_SRC
pha
sec ; subtract delta
lda LZ4_DST ; from destination, make new src
sbc DELTA
sta LZ4_SRC
lda LZ4_DST+1
sbc DELTA+1
sta LZ4_SRC+1
jsr docopy ; do the copy
pla ; restore the src
sta LZ4_SRC
pla
sta LZ4_SRC+1
clc ; c==0 means not done
rts
; jmp parsetoken ; back to parsing tokens
done:
pla
sec ; c=1 means done
rts
;=========
; getsrc
;=========
; gets byte from src into A, increments pointer
getsrc:
lda (LZ4_SRC), Y ; get a byte from src
inc LZ4_SRC ; increment pointer
bne done_getsrc ; update 16-bit pointer
inc LZ4_SRC+1 ; on 8-bit overflow
done_getsrc:
rts
;============
; buildcount
;============
buildcount:
ldx #1 ; high count starts at 1
stx COUNT+1 ; (loops at zero?)
cmp #$0f ; if LITERAL_COUNT < 15, we are done
bne done_buildcount
buildcount_loop:
sta COUNT ; save LITERAL_COUNT (15)
jsr getsrc ; get the next byte
tax ; put in X
clc
adc COUNT ; add new byte to old value
bcc bc_8bit_oflow ; if overflow, increment high byte
inc COUNT+1
bc_8bit_oflow:
inx ; check if read value was 255
beq buildcount_loop ; if it was, keep looping and adding
done_buildcount:
rts
;============
; getput
;============
; gets a byte, then puts the byte
getput:
jsr getsrc
; fallthrough to putdst
;=============
; putdst
;=============
; store A into destination
putdst:
sta (LZ4_DST), Y ; store A into destination
inc LZ4_DST ; increment 16-bit pointer
bne putdst_end ; if overflow, increment top byte
inc LZ4_DST+1
putdst_end:
rts
;=============================
; docopy
;=============================
; copies ram[count+1]-1:X bytes
; from src to dst
docopy:
docopy_loop:
jsr getput ; get/put byte
dex ; decrement count
bne docopy_loop ; if not zero, loop
dec COUNT+1 ; if zero, decrement high byte
bne docopy_loop ; if not zero, loop
rts

View File

@ -32,7 +32,7 @@ CHIPTUNE_PLAYER: chiptune_player.o
chiptune_player.o: chiptune_player.s \
../asm_routines/mockingboard.s \
../asm_routines/dos33_routines.s \
../asm_routines/lz4_decode.s \
../asm_routines/lz4_decode_step.s \
rasterbars.s volume_bars.s interrupt_handler.s \
chip_title.inc zp.inc
ca65 -o chiptune_player.o chiptune_player.s -l chiptune_player.lst

View File

@ -2,3 +2,5 @@
+ Right/Left arrows on screen
+ Keyboard, Right/Left/Pause
+ Calculate maximum decode time for songs
+ Put graphics update in interrupt routine, put debug in normal space

Binary file not shown.

View File

@ -293,12 +293,9 @@ read_size EQU $4000
adc #0
sta LZ4_SRC+1
; Fall through to next_subsong
;=================
; next sub-song
;=================
next_subsong:
; Decompress first chunks
lda #$0
sta COPY_OFFSET
lda #$3
@ -306,6 +303,27 @@ next_subsong:
lda #$20
sta DECODER_STATE
jsr setup_next_subsong
jsr lz4_decode_setup
our_lz4_loop:
jsr lz4_decode_step
bcc our_lz4_loop
rts
;=================
; next sub-song
;=================
setup_next_subsong:
; lda #$0
; sta COPY_OFFSET
; lda #$3
; sta CHUNKSIZE
; lda #$20
; sta DECODER_STATE
ldy #0
lda (LZ4_SRC),Y ; get next size value
@ -322,7 +340,7 @@ next_subsong:
adc #0
sta LZ4_SRC+1
jsr lz4_decode ; decode
; jsr lz4_decode ; decode
; tail-call?
@ -430,7 +448,7 @@ krw_file:
.include "../asm_routines/gr_setpage.s"
.include "../asm_routines/dos33_routines.s"
.include "../asm_routines/gr_hlin.s"
.include "../asm_routines/lz4_decode.s"
.include "../asm_routines/lz4_decode_step.s"
.include "rasterbars.s"
.include "volume_bars.s"
.include "interrupt_handler.s"

View File

@ -166,7 +166,12 @@ handle_copy:
jmp increment_offset ; 3
decompress_step:
; TODO
lda LZ4_DONE
bne increment_offset
jsr lz4_decode_step
bcc increment_offset
inc LZ4_DONE
;==============================================
; incremement offset. If 0 move to next chunk
@ -193,13 +198,22 @@ increment_offset:
start_c:
lda #1
sta CHUNKSIZE
; setup next three chunks of song
jsr setup_next_subsong ; and decompress next ; 6
jsr lz4_decode_setup
lda #0
sta LZ4_DONE
jmp back_to_first_reg
wraparound_to_a:
; setup next three chunks of song
jsr next_subsong ; and decompress next ; 6
lda #$0
sta COPY_OFFSET
lda #$3
sta CHUNKSIZE
lda #$20
sta DECODER_STATE
;==============================
; After 14th reg, reset back to
@ -224,12 +238,12 @@ back_to_first_reg_a:
back_to_first_reg_c:
lda #>(UNPACK_BUFFER+$2A00) ; in proper chunk (1 of 3) ; 2
update_r0_pointer:
sta INH ; update r0 pointer ; 3
;============
; 18
update_r0_pointer:
sta INH ; update r0 pointer ; 3
;=================================
; Finally done with this interrupt

View File

@ -1,11 +1,10 @@
.define EQU =
LZ4_SRC EQU $00
LZ4_DST EQU $02
LZ4_END EQU $04
COUNT EQU $06
DELTA EQU $08
LZ4_SRC EQU $00
LZ4_DST EQU $02
LZ4_END EQU $04
COUNT EQU $06
DELTA EQU $08
;; Zero page monitor routines addresses
@ -85,6 +84,7 @@ DECODER_STATE EQU $7F
;NUM1L EQU $7E
;NUM1H EQU $7F
CHUNKSIZE EQU $80
LZ4_DONE EQU $81
A_COLOR EQU $83
B_COLOR EQU $84

View File

@ -109,6 +109,7 @@ int main(int argc, char **argv) {
FILE *fff;
int size;
short orgoff,paksize,pakoff;
int token_count=0;
init_6502();
@ -186,7 +187,8 @@ int main(int argc, char **argv) {
y=0; // used for offset //ldy #0
parsetoken:
printf("LOAD TOKEN: ");
token_count++;
printf("LOAD TOKEN %d: ",token_count);
getsrc(); // jsr getsrc
// get token
pha(); // save for later // pha
@ -290,6 +292,8 @@ done:
printf("Out size=%d\n",out_size);
printf("Total tokens: %d\n",token_count);
fwrite(&ram[ORGOFFSET],1,out_size,fff);
fclose(fff);