mirror of
https://github.com/deater/dos33fsprogs.git
synced 2024-09-09 02:54:25 +00:00
lzsa: update benchmarks
This commit is contained in:
parent
8d654dce84
commit
012d449534
@ -8,10 +8,15 @@ B2D = ../bmp2dhr/b2d
|
||||
|
||||
all: gr_lzsa.dsk
|
||||
|
||||
gr_lzsa.dsk: HELLO GR_LZSA
|
||||
cp empty.dsk mist.dsk
|
||||
$(DOS33) -y mist.dsk SAVE A HELLO
|
||||
$(DOS33) -y mist.dsk BSAVE -a 0x1000 GR_LZSA
|
||||
gr_lzsa.dsk: HELLO GR_LZSA LZSA2_SMALL_BENCH LZSA2_FAST_BENCH \
|
||||
LZ4_BENCH RLE_BENCH
|
||||
cp empty.dsk lzsa_test.dsk
|
||||
$(DOS33) -y lzsa_test.dsk SAVE A HELLO
|
||||
$(DOS33) -y lzsa_test.dsk BSAVE -a 0x1000 GR_LZSA
|
||||
$(DOS33) -y lzsa_test.dsk BSAVE -a 0x1000 LZSA2_SMALL_BENCH
|
||||
$(DOS33) -y lzsa_test.dsk BSAVE -a 0x1000 LZSA2_FAST_BENCH
|
||||
$(DOS33) -y lzsa_test.dsk BSAVE -a 0x1000 RLE_BENCH
|
||||
$(DOS33) -y lzsa_test.dsk BSAVE -a 0x1000 LZ4_BENCH
|
||||
|
||||
###
|
||||
|
||||
@ -21,6 +26,46 @@ GR_LZSA: gr_lzsa.o
|
||||
gr_lzsa.o: gr_lzsa.s decompress_small_v2.s
|
||||
ca65 -o gr_lzsa.o gr_lzsa.s -l gr_lzsa.lst
|
||||
|
||||
###
|
||||
|
||||
LZSA2_SMALL_BENCH: lzsa2_small_bench.o
|
||||
ld65 -o LZSA2_SMALL_BENCH lzsa2_small_bench.o -C ../linker_scripts/apple2_1000.inc
|
||||
|
||||
lzsa2_small_bench.o: lzsa2_small_bench.s decompress_small_v2.s
|
||||
ca65 -o lzsa2_small_bench.o lzsa2_small_bench.s -l lzsa2_small_bench.lst
|
||||
|
||||
###
|
||||
|
||||
LZSA2_FAST_BENCH: lzsa2_fast_bench.o
|
||||
ld65 -o LZSA2_FAST_BENCH lzsa2_fast_bench.o -C ../linker_scripts/apple2_1000.inc
|
||||
|
||||
lzsa2_fast_bench.o: lzsa2_fast_bench.s decompress_fast_v2.s
|
||||
ca65 -o lzsa2_fast_bench.o lzsa2_fast_bench.s -l lzsa2_fast_bench.lst
|
||||
|
||||
###
|
||||
|
||||
RLE_BENCH: rle_bench.o
|
||||
ld65 -o RLE_BENCH rle_bench.o -C ../linker_scripts/apple2_1000.inc
|
||||
|
||||
rle_bench.o: rle_bench.s gr_unrle.s
|
||||
ca65 -o rle_bench.o rle_bench.s -l rle_bench.lst
|
||||
|
||||
###
|
||||
|
||||
LZ4_BENCH: lz4_bench.o
|
||||
ld65 -o LZ4_BENCH lz4_bench.o -C ../linker_scripts/apple2_1000.inc
|
||||
|
||||
lz4_bench.o: lz4_bench.s lz4_decode.s spaceship_far_n.lz4
|
||||
ca65 -o lz4_bench.o lz4_bench.s -l lz4_bench.lst
|
||||
|
||||
###
|
||||
|
||||
|
||||
spaceship_far_n.lz4:
|
||||
lz4 -f -16 spaceship_far_n.gr
|
||||
dd if=spaceship_far_n.gr.lz4 of=spaceship_far_n.lz4 bs=1 skip=11
|
||||
truncate spaceship_far_n.lz4 -s -8
|
||||
|
||||
|
||||
###
|
||||
|
||||
@ -30,4 +75,5 @@ HELLO: hello.bas
|
||||
####
|
||||
|
||||
clean:
|
||||
rm -f *~ *.o *.lst HELLO GR_LZSA
|
||||
rm -f *~ *.o *.lst HELLO GR_LZSA RLE_BENCH LZSA2_FAST_BENCH LZSA2_SMALL_BENCH LZ4_BENCH
|
||||
|
||||
|
367
lzsa_test/decompress_fast_v2.s
Normal file
367
lzsa_test/decompress_fast_v2.s
Normal file
@ -0,0 +1,367 @@
|
||||
; note -- modified by Vince Weaver to assemble with ca65
|
||||
|
||||
; -----------------------------------------------------------------------------
|
||||
; Decompress raw LZSA2 block.
|
||||
; Create one with lzsa -r -f2 <original_file> <compressed_file>
|
||||
;
|
||||
; in:
|
||||
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
|
||||
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
|
||||
;
|
||||
; out:
|
||||
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
|
||||
;
|
||||
; -----------------------------------------------------------------------------
|
||||
; Backward decompression is also supported, use lzsa -r -b -f2 <original_file> <compressed_file>
|
||||
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
|
||||
;
|
||||
; in:
|
||||
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
|
||||
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
|
||||
;
|
||||
; out:
|
||||
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
|
||||
;
|
||||
; -----------------------------------------------------------------------------
|
||||
;
|
||||
; Copyright (C) 2019 Emmanuel Marty, Peter Ferrie
|
||||
;
|
||||
; This software is provided 'as-is', without any express or implied
|
||||
; warranty. In no event will the authors be held liable for any damages
|
||||
; arising from the use of this software.
|
||||
;
|
||||
; Permission is granted to anyone to use this software for any purpose,
|
||||
; including commercial applications, and to alter it and redistribute it
|
||||
; freely, subject to the following restrictions:
|
||||
;
|
||||
; 1. The origin of this software must not be misrepresented; you must not
|
||||
; claim that you wrote the original software. If you use this software
|
||||
; in a product, an acknowledgment in the product documentation would be
|
||||
; appreciated but is not required.
|
||||
; 2. Altered source versions must be plainly marked as such, and must not be
|
||||
; misrepresented as being the original software.
|
||||
; 3. This notice may not be removed or altered from any source distribution.
|
||||
; -----------------------------------------------------------------------------
|
||||
|
||||
;NIBCOUNT = $FC ; zero-page location for temp offset
|
||||
|
||||
decompress_lzsa2_fast:
|
||||
|
||||
sta LZSA_DST_HI
|
||||
|
||||
ldy #$00
|
||||
sty LZSA_DST_LO
|
||||
sty NIBCOUNT
|
||||
|
||||
decode_token:
|
||||
jsr getsrc ; read token byte: XYZ|LL|MMM
|
||||
pha ; preserve token on stack
|
||||
|
||||
and #$18 ; isolate literals count (LL)
|
||||
beq no_literals ; skip if no literals to copy
|
||||
cmp #$18 ; LITERALS_RUN_LEN_V2?
|
||||
bcc prepare_copy_literals ; if less, count is directly embedded in token
|
||||
|
||||
jsr getnibble ; get extra literals length nibble
|
||||
; add nibble to len from token
|
||||
adc #$02 ; (LITERALS_RUN_LEN_V2) minus carry
|
||||
cmp #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
|
||||
bcc prepare_copy_literals_direct ; if less, literals count is complete
|
||||
|
||||
jsr getsrc ; get extra byte of variable literals count
|
||||
; the carry is always set by the CMP above
|
||||
; GETSRC doesn't change it
|
||||
sbc #$EE ; overflow?
|
||||
jmp prepare_copy_literals_direct
|
||||
|
||||
prepare_copy_literals_large:
|
||||
; handle 16 bits literals count
|
||||
; literals count = directly these 16 bits
|
||||
jsr getlargesrc ; grab low 8 bits in X, high 8 bits in A
|
||||
tay ; put high 8 bits in Y
|
||||
bcs prepare_copy_literals_high ; (*same as JMP PREPARE_COPY_LITERALS_HIGH but shorter)
|
||||
|
||||
prepare_copy_literals:
|
||||
lsr ; shift literals count into place
|
||||
lsr
|
||||
lsr
|
||||
|
||||
prepare_copy_literals_direct:
|
||||
tax
|
||||
bcs prepare_copy_literals_large ; if so, literals count is large
|
||||
|
||||
prepare_copy_literals_high:
|
||||
txa
|
||||
beq copy_literals
|
||||
iny
|
||||
|
||||
copy_literals:
|
||||
jsr getput ; copy one byte of literals
|
||||
dex
|
||||
bne copy_literals
|
||||
dey
|
||||
bne copy_literals
|
||||
|
||||
no_literals:
|
||||
pla ; retrieve token from stack
|
||||
pha ; preserve token again
|
||||
asl
|
||||
bcs repmatch_or_large_offset ; 1YZ: rep-match or 13/16 bit offset
|
||||
|
||||
asl ; 0YZ: 5 or 9 bit offset
|
||||
bcs offset_9_bit
|
||||
|
||||
; 00Z: 5 bit offset
|
||||
|
||||
ldx #$FF ; set offset bits 15-8 to 1
|
||||
|
||||
jsr getcombinedbits ; rotate Z bit into bit 0, read nibble for bits 4-1
|
||||
ora #$E0 ; set bits 7-5 to 1
|
||||
bne got_offset_lo ; go store low byte of match offset and prepare match
|
||||
|
||||
offset_9_bit: ; 01Z: 9 bit offset
|
||||
;;asl ; shift Z (offset bit 8) in place
|
||||
rol
|
||||
rol
|
||||
and #$01
|
||||
eor #$FF ; set offset bits 15-9 to 1
|
||||
bne got_offset_hi ; go store high byte, read low byte of match offset and prepare match
|
||||
; (*same as JMP GOT_OFFSET_HI but shorter)
|
||||
|
||||
repmatch_or_large_offset:
|
||||
asl ; 13 bit offset?
|
||||
bcs repmatch_or_16bit ; handle rep-match or 16-bit offset if not
|
||||
|
||||
; 10Z: 13 bit offset
|
||||
|
||||
jsr getcombinedbits ; rotate Z bit into bit 8, read nibble for bits 12-9
|
||||
adc #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
|
||||
bne got_offset_hi ; go store high byte, read low byte of match offset and prepare match
|
||||
; (*same as JMP GOT_OFFSET_HI but shorter)
|
||||
|
||||
repmatch_or_16bit: ; rep-match or 16 bit offset
|
||||
;;ASL ; XYZ=111?
|
||||
bmi rep_match ; reuse previous offset if so (rep-match)
|
||||
|
||||
; 110: handle 16 bit offset
|
||||
jsr getsrc ; grab high 8 bits
|
||||
got_offset_hi:
|
||||
tax
|
||||
jsr getsrc ; grab low 8 bits
|
||||
got_offset_lo:
|
||||
sta OFFSLO ; store low byte of match offset
|
||||
stx OFFSHI ; store high byte of match offset
|
||||
|
||||
rep_match:
|
||||
.ifdef BACKWARD_DECOMPRESS
|
||||
|
||||
; Backward decompression - substract match offset
|
||||
|
||||
sec ; add dest + match offset
|
||||
lda putdst+1 ; low 8 bits
|
||||
OFFSLO = *+1
|
||||
sbc #$AA
|
||||
sta copy_match_loop+1 ; store back reference address
|
||||
lda putdst+2
|
||||
OFFSHI = *+1
|
||||
sbc #$AA ; high 8 bits
|
||||
sta copy_match_loop+2 ; store high 8 bits of address
|
||||
sec
|
||||
|
||||
.else
|
||||
|
||||
; Forward decompression - add match offset
|
||||
|
||||
clc ; add dest + match offset
|
||||
lda putdst+1 ; low 8 bits
|
||||
OFFSLO = *+1
|
||||
adc #$AA
|
||||
sta copy_match_loop+1 ; store back reference address
|
||||
OFFSHI = *+1
|
||||
lda #$AA ; high 8 bits
|
||||
adc putdst+2
|
||||
sta copy_match_loop+2 ; store high 8 bits of address
|
||||
.endif
|
||||
|
||||
pla ; retrieve token from stack again
|
||||
and #$07 ; isolate match len (MMM)
|
||||
adc #$01 ; add MIN_MATCH_SIZE_V2 and carry
|
||||
cmp #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
|
||||
bcc prepare_copy_match ; if less, length is directly embedded in token
|
||||
|
||||
jsr getnibble ; get extra match length nibble
|
||||
; add nibble to len from token
|
||||
adc #$08 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
|
||||
cmp #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
|
||||
bcc prepare_copy_match ; if less, match length is complete
|
||||
|
||||
jsr getsrc ; get extra byte of variable match length
|
||||
; the carry is always set by the CMP above
|
||||
; GETSRC doesn't change it
|
||||
sbc #$E8 ; overflow?
|
||||
|
||||
prepare_copy_match:
|
||||
tax
|
||||
bcc prepare_copy_match_y ; if not, the match length is complete
|
||||
beq decompression_done ; if EOD code, bail
|
||||
|
||||
; Handle 16 bits match length
|
||||
jsr getlargesrc ; grab low 8 bits in X, high 8 bits in A
|
||||
tay ; put high 8 bits in Y
|
||||
|
||||
prepare_copy_match_y:
|
||||
txa
|
||||
beq copy_match_loop
|
||||
iny
|
||||
|
||||
copy_match_loop:
|
||||
lda $AAAA ; get one byte of backreference
|
||||
jsr putdst ; copy to destination
|
||||
|
||||
.ifdef BACKWARD_DECOMPRESS
|
||||
|
||||
; Backward decompression -- put backreference bytes backward
|
||||
|
||||
lda copy_match_loop+1
|
||||
beq getmatch_adj_hi
|
||||
getmatch_done:
|
||||
dec copy_match_loop+1
|
||||
|
||||
.else
|
||||
|
||||
; Forward decompression -- put backreference bytes forward
|
||||
|
||||
inc copy_match_loop+1
|
||||
beq getmatch_adj_hi
|
||||
getmatch_done:
|
||||
|
||||
.endif
|
||||
|
||||
dex
|
||||
bne copy_match_loop
|
||||
dey
|
||||
bne copy_match_loop
|
||||
jmp decode_token
|
||||
|
||||
.ifdef BACKWARD_DECOMPRESS
|
||||
|
||||
getmatch_adj_hi:
|
||||
dec copy_match_loop+2
|
||||
jmp getmatch_done
|
||||
|
||||
.else
|
||||
|
||||
getmatch_adj_hi:
|
||||
inc copy_match_loop+2
|
||||
jmp getmatch_done
|
||||
.endif
|
||||
|
||||
getcombinedbits:
|
||||
eor #$80
|
||||
asl
|
||||
php
|
||||
|
||||
jsr getnibble ; get nibble into bits 0-3 (for offset bits 1-4)
|
||||
plp ; merge Z bit as the carry bit (for offset bit 0)
|
||||
combinedbitz:
|
||||
rol ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
|
||||
decompression_done:
|
||||
rts
|
||||
|
||||
getnibble:
|
||||
NIBBLES = *+1
|
||||
lda #$AA
|
||||
lsr NIBCOUNT
|
||||
bcc need_nibbles
|
||||
and #$0F ; isolate low 4 bits of nibble
|
||||
rts
|
||||
|
||||
need_nibbles:
|
||||
inc NIBCOUNT
|
||||
jsr getsrc ; get 2 nibbles
|
||||
sta NIBBLES
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
sec
|
||||
rts
|
||||
|
||||
.ifdef BACKWARD_DECOMPRESS
|
||||
|
||||
; Backward decompression -- get and put bytes backward
|
||||
|
||||
getput:
|
||||
jsr getsrc
|
||||
putdst:
|
||||
LZSA_DST_LO = *+1
|
||||
LZSA_DST_HI = *+2
|
||||
sta $AAAA
|
||||
lda putdst+1
|
||||
beq putdst_adj_hi
|
||||
dec putdst+1
|
||||
rts
|
||||
|
||||
putdst_adj_hi:
|
||||
dec putdst+2
|
||||
dec putdst+1
|
||||
rts
|
||||
|
||||
getlargesrc:
|
||||
jsr getsrc ; grab low 8 bits
|
||||
tax ; move to X
|
||||
; fall through grab high 8 bits
|
||||
|
||||
getsrc:
|
||||
LZSA_SRC_LO = *+1
|
||||
LZSA_SRC_HI = *+2
|
||||
lda $AAAA
|
||||
pha
|
||||
lda getsrc+1
|
||||
beq getsrc_adj_hi
|
||||
dec getsrc+1
|
||||
pla
|
||||
rts
|
||||
|
||||
getsrc_adj_hi:
|
||||
dec getsrc+2
|
||||
dec getsrc+1
|
||||
pla
|
||||
rts
|
||||
|
||||
.else
|
||||
|
||||
; Forward decompression -- get and put bytes forward
|
||||
|
||||
getput:
|
||||
jsr getsrc
|
||||
putdst:
|
||||
LZSA_DST_LO = *+1
|
||||
LZSA_DST_HI = *+2
|
||||
sta $AAAA
|
||||
inc putdst+1
|
||||
beq putdst_adj_hi
|
||||
rts
|
||||
|
||||
putdst_adj_hi:
|
||||
inc putdst+2
|
||||
rts
|
||||
|
||||
getlargesrc:
|
||||
jsr getsrc ; grab low 8 bits
|
||||
tax ; move to X
|
||||
; fall through grab high 8 bits
|
||||
|
||||
getsrc:
|
||||
LZSA_SRC_LO = *+1
|
||||
LZSA_SRC_HI = *+2
|
||||
lda $AAAA
|
||||
inc getsrc+1
|
||||
beq getsrc_adj_hi
|
||||
rts
|
||||
|
||||
getsrc_adj_hi:
|
||||
inc getsrc+2
|
||||
rts
|
||||
.endif
|
||||
|
637
lzsa_test/decompress_faster_v2.s
Normal file
637
lzsa_test/decompress_faster_v2.s
Normal file
@ -0,0 +1,637 @@
|
||||
; ***************************************************************************
|
||||
; ***************************************************************************
|
||||
;
|
||||
; lzsa2_6502.s
|
||||
;
|
||||
; NMOS 6502 decompressor for data stored in Emmanuel Marty's LZSA2 format.
|
||||
;
|
||||
; This code is written for the ACME assembler.
|
||||
;
|
||||
; Optional code is presented for two minor 6502 optimizations that break
|
||||
; compatibility with the current LZSA2 format standard.
|
||||
;
|
||||
; The code is 241 bytes for the small version, and 267 bytes for the normal.
|
||||
;
|
||||
; Copyright John Brandwood 2019.
|
||||
;
|
||||
; Distributed under the Boost Software License, Version 1.0.
|
||||
; (See accompanying file LICENSE_1_0.txt or copy at
|
||||
; http://www.boost.org/LICENSE_1_0.txt)
|
||||
;
|
||||
; ***************************************************************************
|
||||
; ***************************************************************************
|
||||
|
||||
|
||||
|
||||
; ***************************************************************************
|
||||
; ***************************************************************************
|
||||
;
|
||||
; Decompression Options & Macros
|
||||
;
|
||||
|
||||
;
|
||||
; Save 7 bytes of code, and 21 cycles every time that a
|
||||
; 16-bit length is decoded?
|
||||
;
|
||||
; N.B. Setting this breaks compatibility with LZSA v1.2
|
||||
;
|
||||
|
||||
LZSA_SWAP_LEN16 = 0
|
||||
|
||||
;
|
||||
; Save 3 bytes of code, and 4 or 8 cycles when decoding
|
||||
; an offset?
|
||||
;
|
||||
; N.B. Setting this breaks compatibility with LZSA v1.2
|
||||
;
|
||||
|
||||
LZSA_SWAP_XZY = 0
|
||||
|
||||
;
|
||||
; Choose size over space (within sane limits)?
|
||||
;
|
||||
|
||||
LZSA_SMALL_SIZE = 0
|
||||
|
||||
;
|
||||
; Remove code inlining to save space?
|
||||
;
|
||||
; This saves 15 bytes of code at the cost of 7% speed.
|
||||
;
|
||||
|
||||
!if LZSA_SMALL_SIZE {
|
||||
LZSA_NO_INLINE = 1
|
||||
} else {
|
||||
LZSA_NO_INLINE = 0
|
||||
}
|
||||
|
||||
;
|
||||
; Use smaller code for copying literals?
|
||||
;
|
||||
; This saves 11 bytes of code at the cost of 5% speed.
|
||||
;
|
||||
|
||||
!if LZSA_SMALL_SIZE {
|
||||
LZSA_SHORT_CP = 1
|
||||
} else {
|
||||
LZSA_SHORT_CP = 0
|
||||
}
|
||||
|
||||
;
|
||||
; Assume that we're decompressing from a large multi-bank
|
||||
; compressed data file, and that the next bank may need to
|
||||
; paged in when a page-boundary is crossed.
|
||||
;
|
||||
|
||||
LZSA_FROM_BANK = 0
|
||||
|
||||
;
|
||||
; We will read from or write to $FFFF. This prevents the
|
||||
; use of the "INC ptrhi / BNE" trick and reduces speed.
|
||||
;
|
||||
|
||||
LZSA_USE_FFFF = 0
|
||||
|
||||
;
|
||||
; Macro to increment the source pointer to the next page.
|
||||
;
|
||||
|
||||
!if LZSA_FROM_BANK {
|
||||
!macro LZSA_INC_PAGE {
|
||||
jsr lzsa2_next_page
|
||||
}
|
||||
} else {
|
||||
!macro LZSA_INC_PAGE {
|
||||
inc <lzsa_srcptr + 1
|
||||
}
|
||||
}
|
||||
|
||||
;
|
||||
; Macro to read a byte from the compressed source data.
|
||||
;
|
||||
|
||||
!if LZSA_NO_INLINE {
|
||||
|
||||
!macro LZSA_GET_SRC {
|
||||
jsr lzsa2_get_byte
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
!macro LZSA_GET_SRC {
|
||||
lda (lzsa_srcptr),y
|
||||
inc <lzsa_srcptr + 0
|
||||
bne .skip
|
||||
+LZSA_INC_PAGE
|
||||
.skip:
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
;
|
||||
; Macro to speed up reading 50% of nibbles.
|
||||
;
|
||||
; This seems to save very few cycles compared to the
|
||||
; increase in code size, and it isn't recommended.
|
||||
;
|
||||
|
||||
LZSA_SLOW_NIBL = 1
|
||||
|
||||
!if (LZSA_SLOW_NIBL + LZSA_SMALL_SIZE) {
|
||||
|
||||
!macro LZSA_GET_NIBL {
|
||||
jsr lzsa2_get_nibble ; Always call a function.
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
!macro LZSA_GET_NIBL {
|
||||
lsr <lzsa_nibflg ; Is there a nibble waiting?
|
||||
lda <lzsa_nibble ; Extract the lo-nibble.
|
||||
bcs .skip
|
||||
jsr lzsa2_new_nibble ; Extract the hi-nibble.
|
||||
.skip: ora #$F0
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
; ***************************************************************************
|
||||
; ***************************************************************************
|
||||
;
|
||||
; Data usage is last 11 bytes of zero-page.
|
||||
;
|
||||
|
||||
lzsa_cmdbuf = $F5 ; 1 byte.
|
||||
lzsa_nibflg = $F6 ; 1 byte.
|
||||
lzsa_nibble = $F7 ; 1 byte.
|
||||
lzsa_offset = $F8 ; 1 word.
|
||||
lzsa_winptr = $FA ; 1 word.
|
||||
lzsa_srcptr = $FC ; 1 word.
|
||||
lzsa_dstptr = $FE ; 1 word.
|
||||
|
||||
lzsa_length = lzsa_winptr ; 1 word.
|
||||
|
||||
LZSA_SRC_LO = $FC
|
||||
LZSA_SRC_HI = $FD
|
||||
LZSA_DST_LO = $FE
|
||||
LZSA_DST_HI = $FF
|
||||
|
||||
|
||||
|
||||
; ***************************************************************************
|
||||
; ***************************************************************************
|
||||
;
|
||||
; lzsa2_unpack - Decompress data stored in Emmanuel Marty's LZSA2 format.
|
||||
;
|
||||
; Args: lzsa_srcptr = ptr to compessed data
|
||||
; Args: lzsa_dstptr = ptr to output buffer
|
||||
; Uses: lots!
|
||||
;
|
||||
; If compiled with LZSA_FROM_BANK, then lzsa_srcptr should be within the bank
|
||||
; window range.
|
||||
;
|
||||
|
||||
DECOMPRESS_LZSA2_FAST:
|
||||
lzsa2_unpack: ldy #0 ; Initialize source index.
|
||||
sty <lzsa_nibflg ; Initialize nibble buffer.
|
||||
|
||||
!if (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
|
||||
|
||||
beq .cp_length ; always taken
|
||||
.incsrc1:
|
||||
inc <lzsa_srcptr + 1
|
||||
bne .resume_src1 ; always taken
|
||||
|
||||
!if LZSA_SHORT_CP {
|
||||
.incsrc2:
|
||||
inc <lzsa_srcptr + 1
|
||||
bne .resume_src2 ; always taken
|
||||
|
||||
.incdst:
|
||||
inc <lzsa_dstptr + 1
|
||||
bne .resume_dst ; always taken
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
;
|
||||
; Copy bytes from compressed source data.
|
||||
;
|
||||
|
||||
.cp_length: ldx #$00 ; Hi-byte of length or offset.
|
||||
|
||||
!if (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
|
||||
|
||||
+LZSA_GET_SRC
|
||||
|
||||
} else {
|
||||
|
||||
lda (lzsa_srcptr),y
|
||||
inc <lzsa_srcptr + 0
|
||||
beq .incsrc1
|
||||
|
||||
}
|
||||
|
||||
.resume_src1:
|
||||
sta <lzsa_cmdbuf ; Preserve this for later.
|
||||
and #$18 ; Extract literal length.
|
||||
beq .lz_offset ; Skip directly to match?
|
||||
|
||||
lsr ; Get 2-bit literal length.
|
||||
lsr
|
||||
lsr
|
||||
cmp #$03 ; Extended length?
|
||||
bne .got_cp_len
|
||||
|
||||
jsr .get_length ; X=0 table index for literals.
|
||||
|
||||
!if LZSA_SHORT_CP {
|
||||
|
||||
.got_cp_len: cmp #0 ; Check the lo-byte of length.
|
||||
beq .put_cp_len
|
||||
|
||||
inx ; Increment # of pages to copy.
|
||||
|
||||
.put_cp_len: stx <lzsa_length
|
||||
tax
|
||||
|
||||
.cp_page: lda (lzsa_srcptr),y
|
||||
sta (lzsa_dstptr),y
|
||||
inc <lzsa_srcptr + 0
|
||||
|
||||
!if (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
|
||||
|
||||
bne .skip1
|
||||
inc <lzsa_srcptr + 1
|
||||
.skip1: inc <lzsa_dstptr + 0
|
||||
bne .skip2
|
||||
inc <lzsa_dstptr + 1
|
||||
.skip2:
|
||||
|
||||
} else {
|
||||
|
||||
beq .incsrc2
|
||||
.resume_src2:
|
||||
inc <lzsa_dstptr + 0
|
||||
beq .incdst
|
||||
.resume_dst:
|
||||
|
||||
}
|
||||
|
||||
dex
|
||||
bne .cp_page
|
||||
dec <lzsa_length ; Any full pages left to copy?
|
||||
bne .cp_page
|
||||
|
||||
} else {
|
||||
|
||||
.got_cp_len: tay ; Check the lo-byte of length.
|
||||
beq .cp_page
|
||||
|
||||
inx ; Increment # of pages to copy.
|
||||
|
||||
.get_cp_src: clc ; Calc address of partial page.
|
||||
adc <lzsa_srcptr + 0
|
||||
sta <lzsa_srcptr + 0
|
||||
bcs .get_cp_dst
|
||||
dec <lzsa_srcptr + 1
|
||||
|
||||
.get_cp_dst: tya
|
||||
clc ; Calc address of partial page.
|
||||
adc <lzsa_dstptr + 0
|
||||
sta <lzsa_dstptr + 0
|
||||
bcs .get_cp_idx
|
||||
dec <lzsa_dstptr + 1
|
||||
|
||||
.get_cp_idx: tya ; Negate the lo-byte of length.
|
||||
eor #$FF
|
||||
tay
|
||||
iny
|
||||
|
||||
.cp_page: lda (lzsa_srcptr),y
|
||||
sta (lzsa_dstptr),y
|
||||
iny
|
||||
bne .cp_page
|
||||
inc <lzsa_srcptr + 1
|
||||
inc <lzsa_dstptr + 1
|
||||
dex ; Any full pages left to copy?
|
||||
bne .cp_page
|
||||
|
||||
}
|
||||
|
||||
!if LZSA_SWAP_XZY {
|
||||
|
||||
;
|
||||
; Shorter and faster path with NEW order of bits.
|
||||
;
|
||||
; STD NEW
|
||||
; ================================
|
||||
; xyz xzy
|
||||
; 00z 0z0 5-bit offset
|
||||
; 01z 0z1 9-bit offset
|
||||
; 10z 1z0 13-bit offset
|
||||
; 110 101 16-bit offset
|
||||
; 111 111 repeat offset
|
||||
; NVZ for a BIT instruction
|
||||
;
|
||||
; N.B. Saves 3 bytes in code length.
|
||||
; get5 and get13 are 8 cycles faster.
|
||||
; get9, get16, and rep are 4 cycles faster.
|
||||
;
|
||||
|
||||
.lz_offset: lda #$20 ; Y bit in lzsa_cmdbuf.
|
||||
bit <lzsa_cmdbuf
|
||||
bmi .get_13_16_rep
|
||||
bne .get_9_bits
|
||||
|
||||
.get_5_bits: dex ; X=$FF
|
||||
.get_13_bits: +LZSA_GET_NIBL ; Always returns with CS.
|
||||
bvc .get_5_skip
|
||||
clc
|
||||
.get_5_skip: rol ; Shift into position, set C.
|
||||
cpx #$00 ; X=$FF for a 5-bit offset.
|
||||
bne .set_offset
|
||||
sbc #2 ; Subtract 512 because 13-bit
|
||||
tax ; offset starts at $FE00.
|
||||
bne .get_low8 ; Always NZ from previous TAX.
|
||||
|
||||
.get_9_bits: dex ; X=$FF if VC, X=$FE if VS.
|
||||
bvc .get_low8
|
||||
dex
|
||||
bvs .get_low8 ; Always VS from previous BIT.
|
||||
|
||||
.get_13_16_rep: beq .get_13_bits ; Shares code with 5-bit path.
|
||||
|
||||
.get_16_rep: bvs .lz_length ; Repeat previous offset.
|
||||
|
||||
} else {
|
||||
|
||||
;
|
||||
; Slower and longer path with STD order of bits.
|
||||
;
|
||||
; STD NEW
|
||||
; ================================
|
||||
; xyz xzy
|
||||
; 00z 0z0 5-bit offset
|
||||
; 01z 0z1 9-bit offset
|
||||
; 10z 1z0 13-bit offset
|
||||
; 110 101 16-bit offset
|
||||
; 111 111 repeat offset
|
||||
; NVZ for a BIT instruction
|
||||
;
|
||||
; N.B. Costs 3 bytes in code length.
|
||||
; get5 and get13 are 8 cycles slower.
|
||||
; get9, get16, and rep are 4 cycles slower.
|
||||
;
|
||||
|
||||
.lz_offset: lda <lzsa_cmdbuf
|
||||
asl
|
||||
bcs .get_13_16_rep
|
||||
asl
|
||||
bcs .get_9_bits
|
||||
|
||||
.get_5_bits: dex ; X=$FF
|
||||
.get_13_bits: asl
|
||||
php
|
||||
+LZSA_GET_NIBL ; Always returns with CS.
|
||||
plp
|
||||
rol ; Shift into position, set C.
|
||||
eor #$01
|
||||
cpx #$00 ; X=$FF for a 5-bit offset.
|
||||
bne .set_offset
|
||||
sbc #2 ; Subtract 512 because 13-bit
|
||||
; offset starts at $FE00.
|
||||
bne .get_low8x ; Always NZ from previous SBC.
|
||||
|
||||
.get_9_bits: dex ; X=$FF if CS, X=$FE if CC.
|
||||
asl
|
||||
bcc .get_low8
|
||||
dex
|
||||
bcs .get_low8 ; Always VS from previous BIT.
|
||||
|
||||
.get_13_16_rep: asl
|
||||
bcc .get_13_bits ; Shares code with 5-bit path.
|
||||
|
||||
.get_16_rep: bmi .lz_length ; Repeat previous offset.
|
||||
|
||||
}
|
||||
|
||||
;
|
||||
; Copy bytes from decompressed window.
|
||||
;
|
||||
; N.B. X=0 is expected and guaranteed when we get here.
|
||||
;
|
||||
|
||||
.get_16_bits: jsr lzsa2_get_byte ; Get hi-byte of offset.
|
||||
|
||||
.get_low8x: tax
|
||||
|
||||
.get_low8:
|
||||
!if (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
|
||||
|
||||
+LZSA_GET_SRC ; Get lo-byte of offset.
|
||||
|
||||
} else {
|
||||
|
||||
lda (lzsa_srcptr),y
|
||||
inc <lzsa_srcptr + 0
|
||||
beq .incsrc3
|
||||
.resume_src3:
|
||||
|
||||
}
|
||||
|
||||
.set_offset: stx <lzsa_offset + 1 ; Save new offset.
|
||||
sta <lzsa_offset + 0
|
||||
|
||||
.lz_length: ldx #$00 ; Hi-byte of length.
|
||||
|
||||
lda <lzsa_cmdbuf
|
||||
and #$07
|
||||
clc
|
||||
adc #$02
|
||||
cmp #$09 ; Extended length?
|
||||
bne .got_lz_len
|
||||
|
||||
inx
|
||||
jsr .get_length ; X=1 table index for match.
|
||||
|
||||
.got_lz_len: eor #$FF ; Negate the lo-byte of length
|
||||
tay ; and check for zero.
|
||||
iny
|
||||
beq .calc_lz_addr
|
||||
eor #$FF
|
||||
|
||||
inx ; Increment # of pages to copy.
|
||||
|
||||
clc ; Calc destination for partial
|
||||
adc <lzsa_dstptr + 0 ; page.
|
||||
sta <lzsa_dstptr + 0
|
||||
bcs .calc_lz_addr
|
||||
dec <lzsa_dstptr + 1
|
||||
|
||||
.calc_lz_addr: clc ; Calc address of match.
|
||||
lda <lzsa_dstptr + 0 ; N.B. Offset is negative!
|
||||
adc <lzsa_offset + 0
|
||||
sta <lzsa_winptr + 0
|
||||
lda <lzsa_dstptr + 1
|
||||
adc <lzsa_offset + 1
|
||||
sta <lzsa_winptr + 1
|
||||
|
||||
.lz_page: lda (lzsa_winptr),y
|
||||
sta (lzsa_dstptr),y
|
||||
iny
|
||||
bne .lz_page
|
||||
inc <lzsa_winptr + 1
|
||||
inc <lzsa_dstptr + 1
|
||||
dex ; Any full pages left to copy?
|
||||
bne .lz_page
|
||||
|
||||
jmp .cp_length ; Loop around to the beginning.
|
||||
|
||||
!if (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
|
||||
|
||||
.incsrc3:
|
||||
inc <lzsa_srcptr + 1
|
||||
bne .resume_src3 ; always taken
|
||||
|
||||
}
|
||||
|
||||
;
|
||||
; Lookup tables to differentiate literal and match lengths.
|
||||
;
|
||||
|
||||
.nibl_len_tbl: !byte 3 + $10 ; 0+3 (for literal).
|
||||
!byte 9 + $10 ; 2+7 (for match).
|
||||
|
||||
.byte_len_tbl: !byte 18 - 1 ; 0+3+15 - CS (for literal).
|
||||
!byte 24 - 1 ; 2+7+15 - CS (for match).
|
||||
|
||||
;
|
||||
; Get 16-bit length in X:A register pair.
|
||||
;
|
||||
|
||||
.get_length: +LZSA_GET_NIBL
|
||||
cmp #$FF ; Extended length?
|
||||
bcs .byte_length
|
||||
adc .nibl_len_tbl,x ; Always CC from previous CMP.
|
||||
|
||||
.got_length: ldx #$00 ; Set hi-byte of 4 & 8 bit
|
||||
rts ; lengths.
|
||||
|
||||
.byte_length: jsr lzsa2_get_byte ; So rare, this can be slow!
|
||||
adc .byte_len_tbl,x ; Always CS from previous CMP.
|
||||
bcc .got_length
|
||||
beq .finished
|
||||
|
||||
!if LZSA_SWAP_LEN16 {
|
||||
|
||||
.word_length: jsr lzsa2_get_byte ; So rare, this can be slow!
|
||||
tax
|
||||
|
||||
} else {
|
||||
|
||||
.word_length: jsr lzsa2_get_byte ; So rare, this can be slow!
|
||||
pha
|
||||
jsr lzsa2_get_byte ; So rare, this can be slow!
|
||||
tax
|
||||
pla
|
||||
rts
|
||||
|
||||
}
|
||||
|
||||
lzsa2_get_byte:
|
||||
lda (lzsa_srcptr),y ; Subroutine version for when
|
||||
inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
|
||||
beq lzsa2_next_page
|
||||
rts
|
||||
|
||||
lzsa2_next_page:
|
||||
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
|
||||
!if LZSA_FROM_BANK {
|
||||
bmi lzsa2_next_bank ; Change for target hardware!
|
||||
}
|
||||
rts
|
||||
|
||||
.finished: pla ; Decompression completed, pop
|
||||
pla ; return address.
|
||||
rts
|
||||
|
||||
;
|
||||
; Get a nibble value from compressed data in A.
|
||||
;
|
||||
|
||||
!if (LZSA_SLOW_NIBL | LZSA_SMALL_SIZE) {
|
||||
|
||||
lzsa2_get_nibble:
|
||||
lsr <lzsa_nibflg ; Is there a nibble waiting?
|
||||
lda <lzsa_nibble ; Extract the lo-nibble.
|
||||
bcs .got_nibble
|
||||
|
||||
inc <lzsa_nibflg ; Reset the flag.
|
||||
!if (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
|
||||
|
||||
+LZSA_GET_SRC
|
||||
|
||||
} else {
|
||||
|
||||
lda (lzsa_srcptr),y
|
||||
inc <lzsa_srcptr + 0
|
||||
beq .incsrc4
|
||||
.resume_src4:
|
||||
|
||||
}
|
||||
|
||||
sta <lzsa_nibble ; Preserve for next time.
|
||||
lsr ; Extract the hi-nibble.
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
|
||||
!if LZSA_SWAP_XZY {
|
||||
sec ; Offset code relies on CS.
|
||||
}
|
||||
|
||||
.got_nibble: ora #$F0
|
||||
rts
|
||||
|
||||
} else {
|
||||
|
||||
lzsa2_new_nibble:
|
||||
inc <lzsa_nibflg ; Reset the flag.
|
||||
!if (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
|
||||
|
||||
+LZSA_GET_SRC
|
||||
|
||||
} else {
|
||||
|
||||
lda (lzsa_srcptr),y
|
||||
inc <lzsa_srcptr + 0
|
||||
beq .incsrc4
|
||||
.resume_src4:
|
||||
|
||||
}
|
||||
|
||||
sta <lzsa_nibble ; Preserve for next time.
|
||||
lsr ; Extract the hi-nibble.
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
|
||||
!if LZSA_SWAP_XZY {
|
||||
sec ; Offset code relies on CS.
|
||||
}
|
||||
|
||||
rts
|
||||
|
||||
}
|
||||
|
||||
!if (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
|
||||
|
||||
.incsrc4:
|
||||
inc <lzsa_srcptr + 1
|
||||
bne .resume_src4 ; always taken
|
||||
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
; Note: modifed by Vince Weaver to assemble with ca65
|
||||
|
||||
; -----------------------------------------------------------------------------
|
||||
; Decompress raw LZSA2 block.
|
||||
; Create one with lzsa -r -f2 <original_file> <compressed_file>
|
||||
@ -44,7 +46,12 @@
|
||||
;NIBCOUNT = $FC ; zero-page location for temp offset
|
||||
|
||||
decompress_lzsa2:
|
||||
|
||||
; page to decompress to in a
|
||||
|
||||
sta LZSA_DST_HI
|
||||
ldy #$00
|
||||
sty LZSA_DST_LO
|
||||
sty NIBCOUNT
|
||||
|
||||
decode_token:
|
||||
|
@ -13,10 +13,7 @@
|
||||
lda #>graphic_start
|
||||
sta LZSA_SRC_HI
|
||||
|
||||
lda #$0
|
||||
sta LZSA_DST_LO
|
||||
lda #$c
|
||||
sta LZSA_DST_HI
|
||||
|
||||
jsr decompress_lzsa2
|
||||
|
||||
|
112
lzsa_test/gr_unrle.s
Normal file
112
lzsa_test/gr_unrle.s
Normal file
@ -0,0 +1,112 @@
|
||||
;=================
|
||||
; load RLE image
|
||||
;=================
|
||||
; Output is A:00 (assume page aligned)
|
||||
; Input is in GBASH/GBASL
|
||||
|
||||
; format: first byte=xsize
|
||||
; A0,X,Y means run of X bytes of Y color
|
||||
; A1 means end of file
|
||||
; A2-AF,X means run of low nibble, X color
|
||||
; if high nibble not A: just display color
|
||||
|
||||
; CV = current Y
|
||||
; CH = max xsize (usually 40)
|
||||
; TEMP = page
|
||||
; TEMPY= current X
|
||||
|
||||
|
||||
load_rle_gr:
|
||||
sec
|
||||
sbc #4 ; adjust page to write to
|
||||
; to match gr_offsets
|
||||
sta TEMP
|
||||
|
||||
ldy #$0 ; init Y to 0
|
||||
sty CV
|
||||
|
||||
jsr load_and_increment ; load xsize
|
||||
sta CH
|
||||
|
||||
jsr unrle_new_y
|
||||
|
||||
|
||||
rle_loop:
|
||||
jsr load_and_increment
|
||||
|
||||
tax
|
||||
|
||||
cmp #$A1 ; if 0xa1
|
||||
beq rle_done ; we are done
|
||||
|
||||
and #$f0 ; mask
|
||||
cmp #$a0 ; see if special AX
|
||||
beq decompress_special
|
||||
|
||||
; not special, just color
|
||||
|
||||
txa ; put color back in A
|
||||
ldx #$1 ; only want to print 1
|
||||
bne decompress_run
|
||||
|
||||
decompress_special:
|
||||
txa ; put read value back in A
|
||||
|
||||
and #$0f ; check if was A0
|
||||
|
||||
bne decompress_color ; if A0 need to read run, color
|
||||
|
||||
decompress_large:
|
||||
jsr load_and_increment ; run length now in A
|
||||
|
||||
decompress_color:
|
||||
tax ; put runlen into X
|
||||
jsr load_and_increment ; get color into A
|
||||
|
||||
decompress_run:
|
||||
rle_run_loop:
|
||||
sta (BASL),y ; write out the value
|
||||
inc BASL
|
||||
dec TEMPY
|
||||
bne rle_not_eol ; if less then keep going
|
||||
|
||||
; if here, we are > max_X
|
||||
|
||||
inc CV
|
||||
inc CV
|
||||
pha
|
||||
jsr unrle_new_y
|
||||
pla
|
||||
|
||||
rle_not_eol:
|
||||
dex
|
||||
bne rle_run_loop ; if not zero, keep looping
|
||||
|
||||
beq rle_loop ; and branch always
|
||||
|
||||
rle_done:
|
||||
lda #$15 ; move the cursor somewhere sane
|
||||
sta CV
|
||||
rts
|
||||
|
||||
|
||||
load_and_increment:
|
||||
lda (GBASL),Y
|
||||
inc GBASL
|
||||
bne lai_no_oflo
|
||||
inc GBASH
|
||||
lai_no_oflo:
|
||||
rts
|
||||
|
||||
unrle_new_y:
|
||||
ldy CV
|
||||
lda gr_offsets,Y
|
||||
sta BASL
|
||||
lda gr_offsets+1,Y
|
||||
clc
|
||||
adc TEMP ; adjust for page
|
||||
sta BASH
|
||||
lda CH
|
||||
sta TEMPY
|
||||
ldy #0
|
||||
rts
|
@ -1,2 +1,2 @@
|
||||
5 HOME
|
||||
120 PRINT CHR$(4);"BRUN GR_LZSA"
|
||||
120 PRINT CHR$(4);"CATALOG"
|
||||
|
50
lzsa_test/lz4_bench.s
Normal file
50
lzsa_test/lz4_bench.s
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
.include "zp.inc"
|
||||
.include "hardware.inc"
|
||||
|
||||
lda #0
|
||||
sta DRAW_PAGE
|
||||
|
||||
bit SET_GR
|
||||
bit PAGE0
|
||||
|
||||
bit KEYRESET
|
||||
pause_loop:
|
||||
lda KEYPRESS
|
||||
bpl pause_loop
|
||||
|
||||
lda #<graphic_start
|
||||
sta LZ4_SRC
|
||||
lda #>graphic_start
|
||||
sta LZ4_SRC+1
|
||||
|
||||
lda #<graphic_end
|
||||
sta LZ4_END
|
||||
lda #>graphic_end
|
||||
sta LZ4_END+1
|
||||
|
||||
lda #$0
|
||||
sta LZ4_DST
|
||||
lda #$c
|
||||
sta LZ4_DST+1
|
||||
|
||||
before:
|
||||
|
||||
jsr lz4_decode
|
||||
after:
|
||||
|
||||
jsr gr_copy_to_current
|
||||
|
||||
|
||||
blah:
|
||||
jmp blah
|
||||
|
||||
|
||||
.include "lz4_decode.s"
|
||||
.include "gr_copy.s"
|
||||
.include "gr_offsets.s"
|
||||
|
||||
graphic_start:
|
||||
|
||||
.incbin "spaceship_far_n.lz4"
|
||||
graphic_end:
|
201
lzsa_test/lz4_decode.s
Normal file
201
lzsa_test/lz4_decode.s
Normal file
@ -0,0 +1,201 @@
|
||||
; LZ4 data decompressor for Apple II
|
||||
|
||||
; Code by Peter Ferrie (qkumba) (peter.ferrie@gmail.com)
|
||||
; "LZ4 unpacker in 143 bytes (6502 version) (2013)"
|
||||
; http://pferrie.host22.com/misc/appleii.htm
|
||||
; This is that code, but with comments and labels added for clarity.
|
||||
; I also found a bug when decoding with runs of multiples of 256
|
||||
; which has since been fixed upstream.
|
||||
|
||||
; For LZ4 reference see
|
||||
; https://github.com/lz4/lz4/wiki/lz4_Frame_format.md
|
||||
|
||||
; LZ4 summary:
|
||||
;
|
||||
; HEADER:
|
||||
; Should: check for magic number 04 22 4d 18
|
||||
; FLG: 64 in our case (01=version, block.index=1, block.checksum=0
|
||||
; size=0, checksum=1, reserved
|
||||
; MAX Blocksize: 40 (64kB)
|
||||
; HEADER CHECKSUM: a7
|
||||
; BLOCK HEADER: 4 bytes (le) length If highest bit set, uncompressed!
|
||||
; data (see below), followed by checksum?
|
||||
; BLOCKS:
|
||||
; Token byte. High 4-bits literal length, low 4-bits copy length
|
||||
; + If literal length==15, then following byte gets added to length
|
||||
; If that byte was 255, then keep adding bytes until not 255
|
||||
; + The literal bytes follow. There may be zero of them
|
||||
; + Next is block copy info. little-endian 2-byte offset to
|
||||
; be subtracted from current read position indicating source
|
||||
; + The low 4-bits of the token are the copy length, which needs
|
||||
; 4 added to it. As with the literal length, if it is 15 then
|
||||
; you read a byte and add (and if that byte is 255, keep adding)
|
||||
|
||||
; At end you have 4 byte end-of-block marker (all zeros?) then
|
||||
; 4 bytes of checksum (if marked in flags)
|
||||
; our code does that, so be sure to set end -8
|
||||
|
||||
|
||||
;LZ4_SRC EQU $00
|
||||
;LZ4_DST EQU $02
|
||||
;LZ4_END EQU $04
|
||||
;COUNT EQU $06
|
||||
;DELTA EQU $08
|
||||
|
||||
|
||||
;======================
|
||||
; LZ4 decode
|
||||
;======================
|
||||
; input buffer in LZ4_SRC
|
||||
; end of input in LZ4_END
|
||||
; output buffer in LZ4_DST
|
||||
|
||||
|
||||
lz4_decode:
|
||||
|
||||
|
||||
unpmain:
|
||||
ldy #0 ; used to index, always zero
|
||||
|
||||
parsetoken:
|
||||
jsr getsrc ; get next token
|
||||
pha ; save for later (need bottom 4 bits)
|
||||
|
||||
lsr ; number of literals in top 4 bits
|
||||
lsr ; so shift into place
|
||||
lsr
|
||||
lsr
|
||||
beq copymatches ; if zero, then no literals
|
||||
; jump ahead and copy
|
||||
|
||||
jsr buildcount ; add up all the literal sizes
|
||||
; result is in ram[count+1]-1:A
|
||||
tax ; now in ram[count+1]-1:X
|
||||
jsr docopy ; copy the literals
|
||||
|
||||
lda LZ4_SRC ; 16-bit compare
|
||||
cmp LZ4_END ; to see if we have reached the end
|
||||
lda LZ4_SRC+1
|
||||
sbc LZ4_END+1
|
||||
bcs done
|
||||
|
||||
copymatches:
|
||||
jsr getsrc ; get 16-bit delta value
|
||||
sta DELTA
|
||||
jsr getsrc
|
||||
sta DELTA+1
|
||||
|
||||
pla ; restore token
|
||||
and #$0f ; get bottom 4 bits
|
||||
; match count. 0 means 4
|
||||
; 15 means 19+, must be calculated
|
||||
|
||||
jsr buildcount ; add up count bits, in ram[count+1]-:A
|
||||
|
||||
clc
|
||||
adc #4 ; adjust count by 4 (minmatch)
|
||||
|
||||
tax ; now in ramp[count+1]-1:X
|
||||
|
||||
beq copy_no_adjust ; BUGFIX, don't increment if
|
||||
; exactly a multiple of 0x100
|
||||
bcc copy_no_adjust
|
||||
|
||||
inc COUNT+1 ; increment if we overflowed
|
||||
copy_no_adjust:
|
||||
|
||||
lda LZ4_SRC+1 ; save src on stack
|
||||
pha
|
||||
lda LZ4_SRC
|
||||
pha
|
||||
|
||||
sec ; subtract delta
|
||||
lda LZ4_DST ; from destination, make new src
|
||||
sbc DELTA
|
||||
sta LZ4_SRC
|
||||
lda LZ4_DST+1
|
||||
sbc DELTA+1
|
||||
sta LZ4_SRC+1
|
||||
|
||||
jsr docopy ; do the copy
|
||||
|
||||
pla ; restore the src
|
||||
sta LZ4_SRC
|
||||
pla
|
||||
sta LZ4_SRC+1
|
||||
|
||||
jmp parsetoken ; back to parsing tokens
|
||||
|
||||
done:
|
||||
pla
|
||||
rts
|
||||
|
||||
;=========
|
||||
; getsrc
|
||||
;=========
|
||||
; gets byte from src into A, increments pointer
|
||||
getsrc:
|
||||
lda (LZ4_SRC), Y ; get a byte from src
|
||||
inc LZ4_SRC ; increment pointer
|
||||
bne done_getsrc ; update 16-bit pointer
|
||||
inc LZ4_SRC+1 ; on 8-bit overflow
|
||||
done_getsrc:
|
||||
rts
|
||||
|
||||
;============
|
||||
; buildcount
|
||||
;============
|
||||
buildcount:
|
||||
ldx #1 ; high count starts at 1
|
||||
stx COUNT+1 ; (loops at zero?)
|
||||
cmp #$0f ; if LITERAL_COUNT < 15, we are done
|
||||
bne done_buildcount
|
||||
buildcount_loop:
|
||||
sta COUNT ; save LITERAL_COUNT (15)
|
||||
jsr getsrc ; get the next byte
|
||||
tax ; put in X
|
||||
clc
|
||||
adc COUNT ; add new byte to old value
|
||||
bcc bc_8bit_oflow ; if overflow, increment high byte
|
||||
inc COUNT+1
|
||||
bc_8bit_oflow:
|
||||
inx ; check if read value was 255
|
||||
beq buildcount_loop ; if it was, keep looping and adding
|
||||
done_buildcount:
|
||||
rts
|
||||
|
||||
;============
|
||||
; getput
|
||||
;============
|
||||
; gets a byte, then puts the byte
|
||||
getput:
|
||||
jsr getsrc
|
||||
; fallthrough to putdst
|
||||
|
||||
;=============
|
||||
; putdst
|
||||
;=============
|
||||
; store A into destination
|
||||
putdst:
|
||||
sta (LZ4_DST), Y ; store A into destination
|
||||
inc LZ4_DST ; increment 16-bit pointer
|
||||
bne putdst_end ; if overflow, increment top byte
|
||||
inc LZ4_DST+1
|
||||
putdst_end:
|
||||
rts
|
||||
|
||||
;=============================
|
||||
; docopy
|
||||
;=============================
|
||||
; copies ram[count+1]-1:X bytes
|
||||
; from src to dst
|
||||
docopy:
|
||||
|
||||
docopy_loop:
|
||||
jsr getput ; get/put byte
|
||||
dex ; decrement count
|
||||
bne docopy_loop ; if not zero, loop
|
||||
dec COUNT+1 ; if zero, decrement high byte
|
||||
bne docopy_loop ; if not zero, loop
|
||||
|
||||
rts
|
40
lzsa_test/lzsa2_fast_bench.s
Normal file
40
lzsa_test/lzsa2_fast_bench.s
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
.include "zp.inc"
|
||||
.include "hardware.inc"
|
||||
|
||||
lda #0
|
||||
sta DRAW_PAGE
|
||||
|
||||
bit SET_GR
|
||||
bit PAGE0
|
||||
|
||||
bit KEYRESET
|
||||
pause_loop:
|
||||
lda KEYPRESS
|
||||
bpl pause_loop
|
||||
|
||||
lda #<graphic_start
|
||||
sta LZSA_SRC_LO
|
||||
lda #>graphic_start
|
||||
sta LZSA_SRC_HI
|
||||
|
||||
before:
|
||||
lda #$c
|
||||
jsr decompress_lzsa2_fast
|
||||
after:
|
||||
|
||||
jsr gr_copy_to_current
|
||||
|
||||
|
||||
blah:
|
||||
jmp blah
|
||||
|
||||
|
||||
.include "decompress_fast_v2.s"
|
||||
.include "gr_copy.s"
|
||||
.include "gr_offsets.s"
|
||||
|
||||
graphic_start:
|
||||
|
||||
.incbin "spaceship_far_n.gr.small_v2"
|
||||
graphic_end:
|
40
lzsa_test/lzsa2_small_bench.s
Normal file
40
lzsa_test/lzsa2_small_bench.s
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
.include "zp.inc"
|
||||
.include "hardware.inc"
|
||||
|
||||
lda #0
|
||||
sta DRAW_PAGE
|
||||
|
||||
bit SET_GR
|
||||
bit PAGE0
|
||||
|
||||
bit KEYRESET
|
||||
pause_loop:
|
||||
lda KEYPRESS
|
||||
bpl pause_loop
|
||||
|
||||
lda #<graphic_start
|
||||
sta LZSA_SRC_LO
|
||||
lda #>graphic_start
|
||||
sta LZSA_SRC_HI
|
||||
|
||||
before:
|
||||
lda #$c
|
||||
jsr decompress_lzsa2
|
||||
after:
|
||||
|
||||
jsr gr_copy_to_current
|
||||
|
||||
|
||||
blah:
|
||||
jmp blah
|
||||
|
||||
|
||||
.include "decompress_small_v2.s"
|
||||
.include "gr_copy.s"
|
||||
.include "gr_offsets.s"
|
||||
|
||||
graphic_start:
|
||||
|
||||
.incbin "spaceship_far_n.gr.small_v2"
|
||||
graphic_end:
|
@ -5,3 +5,10 @@ lz4 323
|
||||
|
||||
lzsa -r -f1 -- small_v1 -- 252 bytes
|
||||
lzsa -r -f2 -- small_v2 -- 228 bytes
|
||||
|
||||
|
||||
speed:
|
||||
rle: 8AE7 cycles 687 byte exe
|
||||
lzsa_small_v2: E8EF cycles 782 byte exe
|
||||
lzsa_fast_v2: DFBF cycles 793 byte exe
|
||||
lz4: 1170A cycles 767 byte exe
|
||||
|
65
lzsa_test/rle_bench.s
Normal file
65
lzsa_test/rle_bench.s
Normal file
@ -0,0 +1,65 @@
|
||||
|
||||
.include "zp.inc"
|
||||
.include "hardware.inc"
|
||||
|
||||
lda #0
|
||||
sta DRAW_PAGE
|
||||
|
||||
bit SET_GR
|
||||
bit PAGE0
|
||||
|
||||
bit KEYRESET
|
||||
pause_loop:
|
||||
lda KEYPRESS
|
||||
bpl pause_loop
|
||||
|
||||
lda #<graphic_start
|
||||
sta GBASL
|
||||
lda #>graphic_start
|
||||
sta GBASH
|
||||
|
||||
before:
|
||||
lda #$c
|
||||
jsr load_rle_gr
|
||||
|
||||
after:
|
||||
|
||||
jsr gr_copy_to_current
|
||||
|
||||
|
||||
blah:
|
||||
jmp blah
|
||||
|
||||
|
||||
.include "gr_unrle.s"
|
||||
.include "gr_copy.s"
|
||||
.include "gr_offsets.s"
|
||||
|
||||
graphic_start:
|
||||
spaceship_far_n_rle: .byte $28 ; ysize=48
|
||||
.byte $A0,$FF,$FF, $AF,$FF, $A7,$5F, $A3,$F5, $A0,$1B,$FF, $5F, $05,$05
|
||||
.byte $A4,$00, $0F,$0F, $A0,$1C,$FF, $50, $5F, $05, $A5,$00
|
||||
.byte $F0, $A0,$11,$FF, $0F, $9F,$9F, $AB,$99, $55, $0A
|
||||
.byte $90,$90, $A3,$00, $F0, $AE,$FF, $0F, $9F, $D9
|
||||
.byte $00, $A6,$DD, $A4,$88, $A3,$DD, $88, $55, $D9
|
||||
.byte $00, $A3,$D9, $D0, $9F, $A8,$FF, $A5,$0F, $00,$00
|
||||
.byte $8D,$8D, $00, $A6,$8D, $A4,$88, $A4,$8D, $55, $8D
|
||||
.byte $00, $A7,$8D, $AB,$FF, $F0, $00, $88,$88, $00
|
||||
.byte $AB,$88, $08,$08, $88, $55, $88, $00, $A6,$88
|
||||
.byte $F8, $AE,$FF, $F8, $F0, $A6,$08, $A5,$88, $99,$99
|
||||
.byte $88, $55, $08, $A7,$00, $0F,$0F, $A0,$13,$FF, $A7,$50
|
||||
.byte $59,$59, $58, $55, $5F, $A0,$01,$AF, $A3,$FF, $A9,$F0
|
||||
.byte $FF, $AC,$7F, $4F, $45,$45, $AA,$75, $55, $05
|
||||
.byte $0A, $AC,$7F, $AC,$77, $A3,$44, $00,$00, $44, $54
|
||||
.byte $A3,$55, $A3,$44, $A3,$00, $A0,$18,$77, $44, $FF, $44
|
||||
.byte $00,$00, $44, $A4,$55, $A3,$44, $00, $FF, $00
|
||||
.byte $A0,$18,$77, $A3,$44, $00,$00, $44, $A4,$55, $A3,$44, $A3,$00
|
||||
.byte $A0,$18,$77, $A3,$44, $00,$00, $A6,$55, $44,$44, $A3,$00, $A0,$18,$77
|
||||
.byte $A3,$44, $00,$00, $A6,$55, $54, $44, $A3,$00, $A4,$77
|
||||
.byte $07, $A3,$77, $A3,$07, $AD,$77, $A3,$44, $00, $A8,$55
|
||||
.byte $44, $A3,$00, $A3,$77, $07, $00, $88,$88, $07
|
||||
.byte $A3,$00, $07, $AC,$77, $A3,$44, $00, $A9,$55, $A3,$00
|
||||
.byte $A3,$77, $A3,$00, $A6,$88, $AC,$77, $A3,$44, $AA,$55, $A3,$00
|
||||
.byte $77,$77, $57, $A3,$00, $A6,$88
|
||||
.byte $A1
|
||||
; cycles=7669
|
BIN
lzsa_test/spaceship_far_n.png
Normal file
BIN
lzsa_test/spaceship_far_n.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 416 B |
Loading…
Reference in New Issue
Block a user