67 Commits
1.2.2 ... 1.3.4

Author SHA1 Message Date
01228f3eeb Bump version 2020-08-18 11:57:25 +02:00
e3fd315541 Faster LZSA1 compression 2020-08-18 11:51:24 +02:00
5a0da16874 Increase LZSA2 ratio for some input files 2020-08-18 09:13:54 +02:00
028007b57c Bump version 2020-08-02 09:40:31 +02:00
4682b2e917 Small simplification 2020-07-29 15:23:22 +02:00
060f5d3350 Simplify code, compress LZSA2 another 15% faster 2020-07-29 13:01:24 +02:00
33eec56b9b Bump version 2020-07-27 15:36:01 +02:00
90fa770458 Compress another 8% faster 2020-07-27 13:25:16 +02:00
b2971da2b4 Nicer code 2020-07-26 16:38:22 +02:00
3fb9dc54b1 Compress LZSA2 another 3% faster 2020-07-26 10:07:03 +02:00
00d1d95625 Small improvement 2020-07-24 19:18:46 +02:00
e4f013f2db Compress LZSA2 17% faster 2020-07-24 17:14:01 +02:00
703ff19a3a Bump version 2020-07-14 23:50:44 +02:00
fc5081fb1a Rename confusing definitions 2020-07-14 22:36:38 +02:00
61698b5036 Another LZSA2 compression speedup 2020-07-14 17:01:07 +02:00
cf49af5cda Faster LZSA2 compression 2020-07-14 12:36:56 +02:00
c39158eea8 Compress LZSA2 faster, clean code up 2020-07-13 19:34:07 +02:00
4864f3c184 Compress LZSA1 a little faster 2020-07-10 17:45:13 +02:00
8ed768aafc Nicer and faster code 2020-07-10 08:55:45 +02:00
9c7495f458 Compress LZSA2 ~12% faster 2020-07-06 12:47:56 +02:00
afbb1de16c Merge pull request #48 from dougmasten/dev
More optimizations to the 6809 LZSA depackers
2020-06-28 15:46:17 +02:00
078edef880 Optimize match offset code in 6809 LZSA2 depacker 2020-06-27 04:17:05 -05:00
03692fca2c Update code byte counts for 6809 LZSA depackers 2020-06-27 02:02:33 -05:00
39e11422ec Delay clearing high part of literals count until we really have to in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
fde15d3fb0 Move instruction before branch to save one byte in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
fc8120f0da Optimize handling of 9 bits offset in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
c7b3ffc067 Delay clearing high part of literals count until we really have to in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
137c6201be One byte saving for setting reg A to $FF in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
e397428c1f Remove trailing whitespaces in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
b8cfbbbc7b Optimize handling of token's Z flag bit in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
28ca829924 delay clearing high part of literals count until we really have to in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
27562f4761 Restructure code to eliminate one BRA instruction from loop in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
0307d228a0 Merge pull request #3 from dougmasten/master
Merge pull request
2020-06-26 17:13:02 -05:00
e8b2ebb89f Merge pull request #2 from emmanuel-marty/master
Merge pull request
2020-06-26 17:08:44 -05:00
f72133f4cf Move instruction before branch to save one byte in 6809 LZSA2 depacker 2020-06-26 16:48:58 -05:00
56ba563794 One byte saving for setting reg A to $FF in 6809 LZSA2 depacker 2020-06-26 15:59:24 -05:00
c0f09db364 Delay clearing high part of literals count until we really have to in 6809 LZSA2 depacker 2020-06-26 15:56:28 -05:00
99db30a732 Optimize handling of 9 bits offset in 6809 LZSA2 depacker 2020-06-26 15:18:53 -05:00
061ca99838 Optimize handling of token's Z flag bit in 6809 LZSA2 depacker 2020-06-26 14:52:09 -05:00
7b96368469 Optimize match offset code in 6809 LZSA2 depacker 2020-06-26 14:42:36 -05:00
e9540b2e3d Remove unnecessary "ADDB #$18" as register B will always have this value from 6809 LZSA2 depacker 2020-06-24 00:49:09 -05:00
40212975c2 Bump version 2020-06-22 10:08:40 +02:00
07c3969432 Compress LZSA2 raw files one byte shorter 2020-06-22 00:13:14 +02:00
6a47ed7f41 Remove unnecessary "ADDB #$18" as B register will always have this value from 6809 LZSA depacker 2020-06-21 12:32:54 -05:00
06d63de9d7 Rearrange "CLRA" instruction for slight speed optimization in 6809 LZSA depacker 2020-06-21 12:16:07 -05:00
b5b8ca556a Rearrange match offset code to save 2 bytes in 6809 LZSA depacker 2020-06-21 01:09:15 -05:00
f724663ba8 Update README some more 2020-06-21 00:04:03 +02:00
798c07f6e0 Update README again 2020-06-20 23:59:01 +02:00
2f79779beb Update README 2020-06-20 23:57:22 +02:00
a9b5a7511c Merge pull request #47 from dougmasten/dom
Various optimizations to the 6809 LZSA depackers
2020-06-20 21:24:28 +02:00
7f0316b81c Update code byte counts for 6809 LZSA depackers 2020-06-20 12:45:51 -05:00
e8edc3242d Restructure code to eliminate one BRA instruction in unlzsa1.s 6809 LZSA depacker 2020-06-20 12:27:31 -05:00
1dd65731c4 Remove unnecessary "ADDB #$12" as B register will always have this value from 6809 LZSA depackers 2020-06-20 12:27:22 -05:00
f233d552ca Remove unnecessary "TSTB" instruction from 6809 LZSA depackers 2020-06-20 12:11:35 -05:00
8c99570b06 Merge pull request #1 from emmanuel-marty/master
Add backward depackers for 6809
2020-06-20 12:03:30 -05:00
ee969968c1 Add backward depackers for 6809 2020-06-20 14:41:25 +02:00
f920485899 -3 bytes for 6809 LZSA2 depacker, slightly faster 2020-06-19 20:08:09 +02:00
2cdeda4784 -2 bytes for 6809 LZSA1 depacker, slightly faster 2020-06-19 20:07:41 +02:00
f198431a71 -2 bytes for 6809 LZSA2 depacker, speed unchanged 2020-06-19 18:26:25 +02:00
a6a69ebe17 Mention 6809 decompressors 2020-06-19 12:55:21 +02:00
f80706ff7e -5 bytes and faster 6809 LZSA2 depacker 2020-06-19 12:50:10 +02:00
c3b111cea6 -2 bytes and faster 6809 LZSA1 depacker 2020-06-19 12:49:46 +02:00
e253d3e628 -1 byte for 6809 LZSA2 depacker 2020-06-19 08:57:01 +02:00
26bf944d9f Add 6809 depacker for LZSA2 2020-06-18 17:48:08 +02:00
b88d3465e3 Compress LZSA2 a tiny bit faster 2020-06-18 17:47:31 +02:00
1d0427ecae Make 6809 depacker for LZSA1 smaller/faster 2020-06-18 17:46:42 +02:00
3ce9a2b36e Add 6809 depacker for LZSA1 2020-06-18 13:26:25 +02:00
10 changed files with 1054 additions and 340 deletions

View File

@ -71,9 +71,11 @@ License:
8-bit assembly code:
* Z80 decompressors (size- and speed-optimized) written by [introspec](https://github.com/specke)
* Z80 decompressors (size- and speed-optimized) written by [introspec](https://github.com/specke) with optimizations by [uniabis](https://github.com/uniabis)
* 6502 and 8088 size-optimized improvements by [Peter Ferrie](https://github.com/peterferrie)
* 6502 speed-optimized decompressor by [John Brandwood](https://github.com/jbrandwood)
* 8088 speed-optimized decompressor by [Jim Leonard](https://github.com/mobygamer)
* 6809 decompressors (Tandy CoCo, Thomson MO/TO, Dragon 32/64..) optimized by [Doug Masten](https://github.com/dougmasten)
External links:

104
asm/6809/unlzsa1.s Normal file
View File

@ -0,0 +1,104 @@
; unlzsa1.s - 6809 decompression routine for raw LZSA1 - 110 bytes
; compress with lzsa -r <original_file> <compressed_file>
;
; in: x = start of compressed data
; y = start of decompression buffer
; out: y = end of decompression buffer + 1
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa1 equ lz1token
lz1bigof lda ,x+ ; O set: load MSB 16-bit (negative, signed) offest
lz1gotof leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$0F ; isolate MMMM (embedded match length)
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,x+ ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldb ,x+ ; load 16-bit len in D (low part in B, high in A)
lda ,x+ ; (little endian)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
tstb
bne lz1gotln ; go copy matched bytes if not
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,x+ ; grab low 8 bits of len in B
lz1gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz1cpymt lda ,u+ ; copy matched byte
sta ,y+
leax -1,x ; decrement X
bne lz1cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lz1token ldb ,x+ ; load next token into B: O|LLL|MMMM
pshs b ; save it
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
bne lz1declt ; if not, we have the complete count, go unshift
ldb ,x+ ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
bra lz1gotlt ; we now have the complete count, go copy
lz1midlt tfr b,a ; copy high part of literals count into A
ldb ,x+ ; load low 8 bits of literals count
bra lz1gotlt ; we now have the complete count, go copy
lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz1cpylt lda ,u+ ; copy literal byte
sta ,y+
leax -1,x ; decrement X and update Z flag
bne lz1cpylt ; loop until all literal bytes are copied
tfr u,x
lz1nolt ldb ,x+ ; load either 8-bit or LSB 16-bit offset (negative, signed)
lda ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
lda #$ff ; set high 8 bits
bra lz1gotof

106
asm/6809/unlzsa1b.s Normal file
View File

@ -0,0 +1,106 @@
; unlzsa1b.s - 6809 backward decompression routine for raw LZSA1 - 113 bytes
; compress with lzsa -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
; y = last byte of decompression buffer
; out: y = first byte of decompressed data
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa1
leax 1,x
leay 1,y
bra lz1token
lz1bigof ldd ,--x ; O set: load long 16 bit (negative, signed) offset
lz1gotof nega ; reverse sign of offset in D
negb
sbca #0
leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$0F ; isolate MMMM (embedded match length)
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,-x ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,-x ; grab low 8 bits of len in B
lz1gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz1cpymt lda ,-u ; copy matched byte
sta ,-y
leax -1,x ; decrement X
bne lz1cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lz1token ldb ,-x ; load next token into B: O|LLL|MMMM
pshs b ; save it
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
bne lz1declt ; if not, we have the complete count, go unshift
ldb ,-x ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz1gotlt ; we now have the complete count, go copy
lz1midlt tfr b,a ; copy high part of literals count into A
ldb ,-x ; load low 8 bits of literals count
bra lz1gotlt ; we now have the complete count, go copy
lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz1cpylt lda ,-u ; copy literal byte
sta ,-y
leax -1,x ; decrement X and update Z flag
bne lz1cpylt ; loop until all literal bytes are copied
tfr u,x
lz1nolt ldb ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
ldb ,-x ; O clear: load 8 bit (negative, signed) offset
lda #$ff ; set high 8 bits
bra lz1gotof

149
asm/6809/unlzsa2.s Normal file
View File

@ -0,0 +1,149 @@
; unlzsa2.s - 6809 decompression routine for raw LZSA2 - 172 bytes
; compress with lzsa -f2 -r <original_file> <compressed_file>
;
; in: x = start of compressed data
; y = start of decompression buffer
; out: y = end of decompression buffer + 1
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa2
lsr <lz2nibct,pcr ; reset nibble available flag
lz2token ldb ,x+ ; load next token into B: XYZ|LL|MMM
pshs b ; save it
andb #$18 ; isolate LL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,x+ ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
bra lz2gotlt ; we now have the complete count, go copy
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz2cpylt lda ,u+ ; copy literal byte
sta ,y+
leax -1,x ; decrement X and update Z flag
bne lz2cpylt ; loop until all literal bytes are copied
tfr u,x
lz2nolt ldb ,s ; get token again, don't pop it from the stack
lslb ; push token's X flag bit into carry
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2nibct fcb $00 ; nibble ready flag
lz2nibl ldb #$aa
lsr <lz2nibct,pcr ; nibble ready?
bcs lz2gotnb
inc <lz2nibct,pcr ; flag nibble as ready for next time
ldb ,x+ ; load two nibbles
stb <lz2nibl+1,pcr ; store nibble for next time (low 4 bits)
lsrb ; shift 4 high bits of nibble down
lsrb
lsrb
lsrb
lz2gotnb andb #$0f ; only keep low 4 bits
lz2done rts
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
lsla ; push token's Z flag bit into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
ldd ,x++ ; load high then low 8 bits of offset
lz2gotof std <lz2repof+2,pcr ; store match offset
lz2repof leau $aaaa,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$07 ; isolate MMM (embedded match length)
addb #$02 ; add MIN_MATCH_SIZE_V2
cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
bne lz2gotln ; no, we have the full match length, go copy
bsr lz2nibl ; get offset nibble in B
addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
addb ,x+ ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
ldb ,x+ ; load 16-bit len in D (low part in B, high in A)
lda ,x+ ; (little endian)
lz2gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz2cpymt lda ,u+ ; copy matched byte
sta ,y+
leax -1,x ; decrement X
bne lz2cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lbra lz2token ; go decode next token

154
asm/6809/unlzsa2b.s Normal file
View File

@ -0,0 +1,154 @@
; unlzsa2b.s - 6809 backward decompression routine for raw LZSA2 - 174 bytes
; compress with lzsa -f2 -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
; y = last byte of decompression buffer
; out: y = first byte of decompressed data
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa2
lsr <lz2nibct,pcr ; reset nibble available flag
leax 1,x
leay 1,y
lz2token ldb ,-x ; load next token into B: XYZ|LL|MMM
pshs b ; save it
andb #$18 ; isolate LLL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,-x ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz2gotlt ; we now have the complete count, go copy
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz2cpylt lda ,-u ; copy literal byte
sta ,-y
leax -1,x ; decrement X and update Z flag
bne lz2cpylt ; loop until all literal bytes are copied
tfr u,x
lz2nolt ldb ,s ; get token again, don't pop it from the stack
lslb ; push token's X flag bit into carry
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
bra lz2lowof
lz2nibct fcb $00 ; nibble ready flag
lz2nibl ldb #$aa
lsr <lz2nibct,pcr ; nibble ready?
bcs lz2gotnb
inc <lz2nibct,pcr ; flag nibble as ready for next time
ldb ,-x ; load two nibbles
stb <lz2nibl+1,pcr ; store nibble for next time (low 4 bits)
lsrb ; shift 4 high bits of nibble down
lsrb
lsrb
lsrb
lz2gotnb andb #$0f ; only keep low 4 bits
lz2done rts
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
bra lz2lowof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
lda ,-x ; load high 8 bits of (negative, signed) offset
lz2lowof ldb ,-x ; load low 8 bits of offset
lz2gotof nega ; reverse sign of offset in D
negb
sbca #0
std <lz2repof+2,pcr ; store match offset
lz2repof leau $aaaa,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$07 ; isolate MMM (embedded match length)
addb #$02 ; add MIN_MATCH_SIZE_V2
cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
bne lz2gotln ; no, we have the full match length, go copy
bsr lz2nibl ; get offset nibble in B
addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
addb ,-x ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
lz2gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz2cpymt lda ,-u ; copy matched byte
sta ,-y
leax -1,x ; decrement X
bne lz2cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lbra lz2token ; go decode next token

View File

@ -48,7 +48,7 @@
#define OPT_RAW_BACKWARD 8
#define OPT_STATS 16
#define TOOL_VERSION "1.2.2"
#define TOOL_VERSION "1.3.4"
/*---------------------------------------------------------------------------*/

View File

@ -157,66 +157,69 @@ static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << MATCHES_PER_ARRIVAL_SHIFT);
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
const int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
int i, j, n;
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
memset(arrival + (nStartOffset << MATCHES_PER_ARRIVAL_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << MATCHES_PER_ARRIVAL_SHIFT));
memset(arrival + (nStartOffset << ARRIVALS_PER_POSITION_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << ARRIVALS_PER_POSITION_SHIFT));
arrival[nStartOffset << MATCHES_PER_ARRIVAL_SHIFT].from_slot = -1;
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1;
for (i = nStartOffset; i != nEndOffset; i++) {
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
int m;
for (j = 0; j < NMATCHES_PER_ARRIVAL_V1 && arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].from_slot; j++) {
int nPrevCost = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].cost;
for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 1;
int nNumLiterals = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].num_literals + 1;
int nScore = cur_arrival[j].score + 1;
int nNumLiterals = cur_arrival[j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
if (!nFavorRatio && nNumLiterals == 1)
nCodingChoiceCost += MODESWITCH_PENALTY;
if (nNumLiterals == 1)
nCodingChoiceCost += nModeSwitchPenalty;
for (n = 0; n < NMATCHES_PER_ARRIVAL_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n];
lzsa_arrival *pDestSlots = &arrival[(i + 1) << ARRIVALS_PER_POSITION_SHIFT];
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n + 1],
&arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n],
sizeof(lzsa_arrival) * (NMATCHES_PER_ARRIVAL_V1 - n - 1));
memmove(&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n + 1],
&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = 0;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->score = nScore;
pDestArrival->rep_offset = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].rep_offset;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
break;
}
}
}
const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
int nNumArrivalsForThisPos = j;
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
@ -225,43 +228,48 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
for (j = 0; j < NMATCHES_PER_ARRIVAL_V1 && arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].from_slot; j++) {
int nPrevCost = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].cost;
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT];
for (j = 0; j < nNumArrivalsForThisPos; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 5;
int exists = 0;
if (!nFavorRatio && !arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].num_literals)
nCodingChoiceCost += MODESWITCH_PENALTY;
if (!cur_arrival[j].num_literals)
nCodingChoiceCost += nModeSwitchPenalty;
for (n = 0;
n < NMATCHES_PER_ARRIVAL_V1 && arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].from_slot && arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].cost <= nCodingChoiceCost;
n < NARRIVALS_PER_POSITION_V1 && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
n++) {
if (lzsa_get_offset_cost_v1(arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].rep_offset) == lzsa_get_offset_cost_v1(match[m].offset)) {
if (lzsa_get_offset_cost_v1(pDestSlots[n].rep_offset) == nMatchOffsetCost) {
exists = 1;
break;
}
}
for (n = 0; !exists && n < NMATCHES_PER_ARRIVAL_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n];
if (!exists) {
int nScore = cur_arrival[j].score + 5;
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n + 1],
&arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n],
sizeof(lzsa_arrival) * (NMATCHES_PER_ARRIVAL_V1 - n - 1));
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = match[m].offset;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = match[m].offset;
break;
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = match[m].offset;
j = NARRIVALS_PER_POSITION_V1;
break;
}
}
}
}
@ -269,14 +277,17 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
}
}
lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + 0];
lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
if (end_arrival->from_pos >= nEndOffset) return;
pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
pBestMatch[end_arrival->from_pos].offset = end_arrival->match_offset;
if (end_arrival->match_len)
pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset;
else
pBestMatch[end_arrival->from_pos].offset = 0;
end_arrival = &arrival[(end_arrival->from_pos << MATCHES_PER_ARRIVAL_SHIFT) + (end_arrival->from_slot - 1)];
end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)];
}
}
@ -301,12 +312,12 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length == 0 &&
(i + 1) < (nEndOffset - LAST_LITERALS) &&
(i + 1) < nEndOffset &&
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + 1].length < MAX_VARLEN &&
pBestMatch[i + 1].offset &&
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= (nEndOffset - LAST_LITERALS) &&
(i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);

File diff suppressed because it is too large Load Diff

View File

@ -62,6 +62,9 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->best_match = NULL;
pCompressor->improved_match = NULL;
pCompressor->arrival = NULL;
pCompressor->rep_handled_mask = NULL;
pCompressor->first_offset_for_byte = NULL;
pCompressor->next_offset_for_pos = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
@ -89,7 +92,7 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << MATCHES_PER_ARRIVAL_SHIFT) * sizeof(lzsa_arrival));
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT) * sizeof(lzsa_arrival));
if (pCompressor->arrival) {
pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
@ -102,8 +105,23 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V2 * sizeof(lzsa_match));
else
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
if (pCompressor->match)
return 0;
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->rep_handled_mask = (char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
return 0;
}
}
}
}
else {
return 0;
}
}
}
}
}
@ -124,6 +142,21 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->next_offset_for_pos) {
free(pCompressor->next_offset_for_pos);
pCompressor->next_offset_for_pos = NULL;
}
if (pCompressor->first_offset_for_byte) {
free(pCompressor->first_offset_for_byte);
pCompressor->first_offset_for_byte = NULL;
}
if (pCompressor->rep_handled_mask) {
free(pCompressor->rep_handled_mask);
pCompressor->rep_handled_mask = NULL;
}
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;

View File

@ -49,10 +49,10 @@ extern "C" {
#define VISITED_FLAG 0x80000000
#define EXCL_VISITED_MASK 0x7fffffff
#define NMATCHES_PER_ARRIVAL_V1 8
#define NMATCHES_PER_ARRIVAL_V2_SMALL 9
#define NMATCHES_PER_ARRIVAL_V2_BIG 32
#define MATCHES_PER_ARRIVAL_SHIFT 5
#define NARRIVALS_PER_POSITION_V1 8
#define NARRIVALS_PER_POSITION_V2_SMALL 9
#define NARRIVALS_PER_POSITION_V2_BIG 32
#define ARRIVALS_PER_POSITION_SHIFT 5
#define NMATCHES_PER_INDEX_V1 8
#define MATCHES_PER_INDEX_SHIFT_V1 3
@ -63,8 +63,6 @@ extern "C" {
#define LEAVE_ALONE_MATCH_SIZE 300
#define LEAVE_ALONE_MATCH_SIZE_SMALL 1000
#define LAST_LITERALS 0
#define MODESWITCH_PENALTY 3
/** One match */
@ -81,12 +79,10 @@ typedef struct {
int from_pos;
unsigned short rep_len;
unsigned short match_len;
int rep_pos;
int num_literals;
int score;
unsigned short match_offset;
unsigned short match_len;
} lzsa_arrival;
/** Compression statistics */
@ -128,6 +124,9 @@ typedef struct _lzsa_compressor {
lzsa_match *best_match;
lzsa_match *improved_match;
lzsa_arrival *arrival;
char *rep_handled_mask;
int *first_offset_for_byte;
int *next_offset_for_pos;
int min_match_size;
int format_version;
int flags;