44 Commits
1.3.0 ... 1.3.4

Author SHA1 Message Date
01228f3eeb Bump version 2020-08-18 11:57:25 +02:00
e3fd315541 Faster LZSA1 compression 2020-08-18 11:51:24 +02:00
5a0da16874 Increase LZSA2 ratio for some input files 2020-08-18 09:13:54 +02:00
028007b57c Bump version 2020-08-02 09:40:31 +02:00
4682b2e917 Small simplification 2020-07-29 15:23:22 +02:00
060f5d3350 Simplify code, compress LZSA2 another 15% faster 2020-07-29 13:01:24 +02:00
33eec56b9b Bump version 2020-07-27 15:36:01 +02:00
90fa770458 Compress another 8% faster 2020-07-27 13:25:16 +02:00
b2971da2b4 Nicer code 2020-07-26 16:38:22 +02:00
3fb9dc54b1 Compress LZSA2 another 3% faster 2020-07-26 10:07:03 +02:00
00d1d95625 Small improvement 2020-07-24 19:18:46 +02:00
e4f013f2db Compress LZSA2 17% faster 2020-07-24 17:14:01 +02:00
703ff19a3a Bump version 2020-07-14 23:50:44 +02:00
fc5081fb1a Rename confusing definitions 2020-07-14 22:36:38 +02:00
61698b5036 Another LZSA2 compression speedup 2020-07-14 17:01:07 +02:00
cf49af5cda Faster LZSA2 compression 2020-07-14 12:36:56 +02:00
c39158eea8 Compress LZSA2 faster, clean code up 2020-07-13 19:34:07 +02:00
4864f3c184 Compress LZSA1 a little faster 2020-07-10 17:45:13 +02:00
8ed768aafc Nicer and faster code 2020-07-10 08:55:45 +02:00
9c7495f458 Compress LZSA2 ~12% faster 2020-07-06 12:47:56 +02:00
afbb1de16c Merge pull request #48 from dougmasten/dev
More optimizations to the 6809 LZSA depackers
2020-06-28 15:46:17 +02:00
078edef880 Optimize match offset code in 6809 LZSA2 depacker 2020-06-27 04:17:05 -05:00
03692fca2c Update code byte counts for 6809 LZSA depackers 2020-06-27 02:02:33 -05:00
39e11422ec Delay clearing high part of literals count until we really have to in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
fde15d3fb0 Move instruction before branch to save one byte in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
fc8120f0da Optimize handling of 9 bits offset in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
c7b3ffc067 Delay clearing high part of literals count until we really have to in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
137c6201be One byte saving for setting reg A to $FF in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
e397428c1f Remove trailing whitespaces in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
b8cfbbbc7b Optimize handling of token's Z flag bit in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
28ca829924 delay clearing high part of literals count until we really have to in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
27562f4761 Restructure code to eliminate one BRA instruction from loop in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
0307d228a0 Merge pull request #3 from dougmasten/master
Merge pull request
2020-06-26 17:13:02 -05:00
e8b2ebb89f Merge pull request #2 from emmanuel-marty/master
Merge pull request
2020-06-26 17:08:44 -05:00
f72133f4cf Move instruction before branch to save one byte in 6809 LZSA2 depacker 2020-06-26 16:48:58 -05:00
56ba563794 One byte saving for setting reg A to $FF in 6809 LZSA2 depacker 2020-06-26 15:59:24 -05:00
c0f09db364 Delay clearing high part of literals count until we really have to in 6809 LZSA2 depacker 2020-06-26 15:56:28 -05:00
99db30a732 Optimize handling of 9 bits offset in 6809 LZSA2 depacker 2020-06-26 15:18:53 -05:00
061ca99838 Optimize handling of token's Z flag bit in 6809 LZSA2 depacker 2020-06-26 14:52:09 -05:00
7b96368469 Optimize match offset code in 6809 LZSA2 depacker 2020-06-26 14:42:36 -05:00
e9540b2e3d Remove unnecessary "ADDB #$18" as register B will always have this value from 6809 LZSA2 depacker 2020-06-24 00:49:09 -05:00
6a47ed7f41 Remove unnecessary "ADDB #$18" as B register will always have this value from 6809 LZSA depacker 2020-06-21 12:32:54 -05:00
06d63de9d7 Rearrange "CLRA" instruction for slight speed optimization in 6809 LZSA depacker 2020-06-21 12:16:07 -05:00
b5b8ca556a Rearrange match offset code to save 2 bytes in 6809 LZSA depacker 2020-06-21 01:09:15 -05:00
9 changed files with 611 additions and 438 deletions

View File

@ -1,4 +1,4 @@
; unlzsa1.s - 6809 decompression routine for raw LZSA1 - 111 bytes
; unlzsa1.s - 6809 decompression routine for raw LZSA1 - 110 bytes
; compress with lzsa -r <original_file> <compressed_file>
;
; in: x = start of compressed data
@ -25,8 +25,7 @@
decompress_lzsa1 equ lz1token
lz1bigof ldb ,x+ ; O set: load long 16 bit (negative, signed) offset
lda ,x+ ; (little endian)
lz1bigof lda ,x+ ; O set: load MSB 16-bit (negative, signed) offest
lz1gotof leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
@ -65,7 +64,6 @@ lz1cpymt lda ,u+ ; copy matched byte
lz1token ldb ,x+ ; load next token into B: O|LLL|MMMM
pshs b ; save it
clra ; clear A (high part of literals count)
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
@ -73,7 +71,7 @@ lz1token ldb ,x+ ; load next token into B: O|LLL|MMMM
ldb ,x+ ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotlt ; if no overflow, we got the complete count, copy
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldb ,x+ ; load low 8 bits of little-endian literals count
@ -88,6 +86,7 @@ lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
@ -97,9 +96,9 @@ lz1cpylt lda ,u+ ; copy literal byte
bne lz1cpylt ; loop until all literal bytes are copied
tfr u,x
lz1nolt ldb ,s ; get token again, don't pop it from the stack
lz1nolt ldb ,x+ ; load either 8-bit or LSB 16-bit offset (negative, signed)
lda ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
ldb ,x+ ; O clear: load 8 bit (negative, signed) offset
lda #$ff ; set high 8 bits
bra lz1gotof

View File

@ -1,4 +1,4 @@
; unlzsa1b.s - 6809 backward decompression routine for raw LZSA1 - 112 bytes
; unlzsa1b.s - 6809 backward decompression routine for raw LZSA1 - 113 bytes
; compress with lzsa -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
@ -26,11 +26,47 @@
decompress_lzsa1
leax 1,x
leay 1,y
bra lz1token
lz1bigof ldd ,--x ; O set: load long 16 bit (negative, signed) offset
lz1gotof nega ; reverse sign of offset in D
negb
sbca #0
leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$0F ; isolate MMMM (embedded match length)
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,-x ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,-x ; grab low 8 bits of len in B
lz1gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz1cpymt lda ,-u ; copy matched byte
sta ,-y
leax -1,x ; decrement X
bne lz1cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lz1token ldb ,-x ; load next token into B: O|LLL|MMMM
pshs b ; save it
clra ; clear A (high part of literals count)
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
@ -38,7 +74,7 @@ lz1token ldb ,-x ; load next token into B: O|LLL|MMMM
ldb ,-x ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotlt ; if no overflow, we got the complete count, copy
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
@ -52,11 +88,12 @@ lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz1cpylt lda ,-u ; copy literal byte
sta ,-y
sta ,-y
leax -1,x ; decrement X and update Z flag
bne lz1cpylt ; loop until all literal bytes are copied
tfr u,x
@ -67,40 +104,3 @@ lz1nolt ldb ,s ; get token again, don't pop it from the stack
ldb ,-x ; O clear: load 8 bit (negative, signed) offset
lda #$ff ; set high 8 bits
bra lz1gotof
lz1bigof ldd ,--x ; O set: load long 16 bit (negative, signed) offset
lz1gotof nega ; reverse sign of offset in D
negb
sbca #0
leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$0F ; isolate MMMM (embedded match length)
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,-x ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,-x ; grab low 8 bits of len in B
lz1gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz1cpymt lda ,-u ; copy matched byte
sta ,-y
leax -1,x ; decrement X
bne lz1cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
bra lz1token ; go decode next token

View File

@ -1,4 +1,4 @@
; unlzsa2.s - 6809 decompression routine for raw LZSA2 - 183 bytes
; unlzsa2.s - 6809 decompression routine for raw LZSA2 - 172 bytes
; compress with lzsa -f2 -r <original_file> <compressed_file>
;
; in: x = start of compressed data
@ -29,8 +29,7 @@ decompress_lzsa2
lz2token ldb ,x+ ; load next token into B: XYZ|LL|MMM
pshs b ; save it
clra ; clear A (high part of literals count)
andb #$18 ; isolate LLL (embedded literals count) in B
andb #$18 ; isolate LL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
@ -38,10 +37,10 @@ lz2token ldb ,x+ ; load next token into B: XYZ|LL|MMM
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotlt ; if not, we have the full literals count, go copy
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,x+ ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotlt ; if no overflow, we got the complete count, copy
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
@ -50,11 +49,12 @@ lz2token ldb ,x+ ; load next token into B: XYZ|LL|MMM
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz2cpylt lda ,u+ ; copy literal byte
sta ,y+
sta ,y+
leax -1,x ; decrement X and update Z flag
bne lz2cpylt ; loop until all literal bytes are copied
tfr u,x
@ -65,23 +65,18 @@ lz2nolt ldb ,s ; get token again, don't pop it from the stack
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
lslb ; push token's Z flag bit into carry
tfr cc,a ; preserve cpu flags (to preserve carry)
bsr lz2nibl ; get offset nibble in B
tfr a,cc ; restore cpu flags
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
lda #$ff ; set bits 8-15 of offset
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 clra ; clear A (to prepare for high 8 bits of offset)
lslb ; push token's Z flag bit into carry
rola ; shift Z flag from carry into bit 0 of A
coma ; set bits 9-15 of offset, reverse bit 8
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
@ -106,30 +101,24 @@ lz2done rts
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
lslb ; push token's Z flag bit into carry
tfr cc,a ; preserve cpu flags (to preserve carry)
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
tfr a,cc ; restore cpu flags
lsla ; push token's Z flag bit into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
ldd ,x++ ; load high then low 8 bits of offset
lz2gotof std <lz2repof+1,pcr ; store match offset
lz2gotof std <lz2repof+2,pcr ; store match offset
lz2repof leau $aaaa,y ; put backreference start address in U (dst+offset)
lz2repof ldd #$aaaa ; load match offset
leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$07 ; isolate MMM (embedded match length)
addb #$02 ; add MIN_MATCH_SIZE_V2
@ -141,8 +130,7 @@ lz2repof ldd #$aaaa ; load match offset
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
ldb ,x+ ; load extra length byte
addb #$18 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
addb ,x+ ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
@ -153,10 +141,9 @@ lz2gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz2cpymt lda ,u+ ; copy matched byte
sta ,y+
sta ,y+
leax -1,x ; decrement X
bne lz2cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lbra lz2token ; go decode next token

View File

@ -1,4 +1,4 @@
; unlzsa2b.s - 6809 backward decompression routine for raw LZSA2 - 187 bytes
; unlzsa2b.s - 6809 backward decompression routine for raw LZSA2 - 174 bytes
; compress with lzsa -f2 -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
@ -31,7 +31,6 @@ decompress_lzsa2
lz2token ldb ,-x ; load next token into B: XYZ|LL|MMM
pshs b ; save it
clra ; clear A (high part of literals count)
andb #$18 ; isolate LLL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
@ -40,10 +39,10 @@ lz2token ldb ,-x ; load next token into B: XYZ|LL|MMM
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotlt ; if not, we have the full literals count, go copy
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,-x ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotlt ; if no overflow, we got the complete count, copy
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz2gotlt ; we now have the complete count, go copy
@ -51,11 +50,12 @@ lz2token ldb ,-x ; load next token into B: XYZ|LL|MMM
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz2cpylt lda ,-u ; copy literal byte
sta ,-y
sta ,-y
leax -1,x ; decrement X and update Z flag
bne lz2cpylt ; loop until all literal bytes are copied
tfr u,x
@ -66,22 +66,18 @@ lz2nolt ldb ,s ; get token again, don't pop it from the stack
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
lslb ; push token's Z flag bit into carry
tfr cc,a ; preserve cpu flags (to preserve carry)
bsr lz2nibl ; get offset nibble in B
tfr a,cc ; restore cpu flags
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
lda #$ff ; set bits 8-15 of offset
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 clra ; clear A (to prepare for high 8 bits of offset)
lslb ; push token's Z flag bit into carry
rola ; shift Z flag from carry into bit 0 of A
coma ; set bits 9-15 of offset, reverse bit 8
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
bra lz2lowof
lz2nibct fcb $00 ; nibble ready flag
@ -105,10 +101,9 @@ lz2done rts
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
lslb ; push token's Z flag bit into carry
tfr cc,a ; preserve cpu flags (to preserve carry)
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
tfr a,cc ; restore cpu flags
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
@ -117,20 +112,19 @@ lz2replg lslb ; push token's Y flag bit into carry
bra lz2lowof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
lda ,-x ; load high 8 bits of (negative, signed) offset
lz2lowof ldb ,-x ; load low 8 bits of offset
lz2gotof nega ; reverse sign of offset in D
negb
sbca #0
std <lz2repof+1,pcr ; store match offset
std <lz2repof+2,pcr ; store match offset
lz2repof leau $aaaa,y ; put backreference start address in U (dst+offset)
lz2repof ldd #$aaaa ; load match offset
leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$07 ; isolate MMM (embedded match length)
addb #$02 ; add MIN_MATCH_SIZE_V2
@ -142,8 +136,7 @@ lz2repof ldd #$aaaa ; load match offset
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
ldb ,-x ; load extra length byte
addb #$18 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
addb ,-x ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
@ -153,10 +146,9 @@ lz2gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz2cpymt lda ,-u ; copy matched byte
sta ,-y
sta ,-y
leax -1,x ; decrement X
bne lz2cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lbra lz2token ; go decode next token

View File

@ -48,7 +48,7 @@
#define OPT_RAW_BACKWARD 8
#define OPT_STATS 16
#define TOOL_VERSION "1.3.0"
#define TOOL_VERSION "1.3.4"
/*---------------------------------------------------------------------------*/

View File

@ -157,66 +157,69 @@ static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << MATCHES_PER_ARRIVAL_SHIFT);
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
const int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
int i, j, n;
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
memset(arrival + (nStartOffset << MATCHES_PER_ARRIVAL_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << MATCHES_PER_ARRIVAL_SHIFT));
memset(arrival + (nStartOffset << ARRIVALS_PER_POSITION_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << ARRIVALS_PER_POSITION_SHIFT));
arrival[nStartOffset << MATCHES_PER_ARRIVAL_SHIFT].from_slot = -1;
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1;
for (i = nStartOffset; i != nEndOffset; i++) {
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
int m;
for (j = 0; j < NMATCHES_PER_ARRIVAL_V1 && arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].from_slot; j++) {
int nPrevCost = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].cost;
for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 1;
int nNumLiterals = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].num_literals + 1;
int nScore = cur_arrival[j].score + 1;
int nNumLiterals = cur_arrival[j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
if (!nFavorRatio && nNumLiterals == 1)
nCodingChoiceCost += MODESWITCH_PENALTY;
if (nNumLiterals == 1)
nCodingChoiceCost += nModeSwitchPenalty;
for (n = 0; n < NMATCHES_PER_ARRIVAL_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n];
lzsa_arrival *pDestSlots = &arrival[(i + 1) << ARRIVALS_PER_POSITION_SHIFT];
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n + 1],
&arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n],
sizeof(lzsa_arrival) * (NMATCHES_PER_ARRIVAL_V1 - n - 1));
memmove(&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n + 1],
&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = 0;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->score = nScore;
pDestArrival->rep_offset = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].rep_offset;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
break;
}
}
}
const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
int nNumArrivalsForThisPos = j;
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
@ -225,43 +228,48 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
for (j = 0; j < NMATCHES_PER_ARRIVAL_V1 && arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].from_slot; j++) {
int nPrevCost = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].cost;
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT];
for (j = 0; j < nNumArrivalsForThisPos; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 5;
int exists = 0;
if (!nFavorRatio && !arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].num_literals)
nCodingChoiceCost += MODESWITCH_PENALTY;
if (!cur_arrival[j].num_literals)
nCodingChoiceCost += nModeSwitchPenalty;
for (n = 0;
n < NMATCHES_PER_ARRIVAL_V1 && arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].from_slot && arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].cost <= nCodingChoiceCost;
n < NARRIVALS_PER_POSITION_V1 && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
n++) {
if (lzsa_get_offset_cost_v1(arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].rep_offset) == lzsa_get_offset_cost_v1(match[m].offset)) {
if (lzsa_get_offset_cost_v1(pDestSlots[n].rep_offset) == nMatchOffsetCost) {
exists = 1;
break;
}
}
for (n = 0; !exists && n < NMATCHES_PER_ARRIVAL_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n];
if (!exists) {
int nScore = cur_arrival[j].score + 5;
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n + 1],
&arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n],
sizeof(lzsa_arrival) * (NMATCHES_PER_ARRIVAL_V1 - n - 1));
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = match[m].offset;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = match[m].offset;
break;
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = match[m].offset;
j = NARRIVALS_PER_POSITION_V1;
break;
}
}
}
}
@ -269,14 +277,17 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
}
}
lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + 0];
lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
if (end_arrival->from_pos >= nEndOffset) return;
pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
pBestMatch[end_arrival->from_pos].offset = end_arrival->match_offset;
if (end_arrival->match_len)
pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset;
else
pBestMatch[end_arrival->from_pos].offset = 0;
end_arrival = &arrival[(end_arrival->from_pos << MATCHES_PER_ARRIVAL_SHIFT) + (end_arrival->from_slot - 1)];
end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)];
}
}
@ -301,12 +312,12 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length == 0 &&
(i + 1) < (nEndOffset - LAST_LITERALS) &&
(i + 1) < nEndOffset &&
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + 1].length < MAX_VARLEN &&
pBestMatch[i + 1].offset &&
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= (nEndOffset - LAST_LITERALS) &&
(i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);

File diff suppressed because it is too large Load Diff

View File

@ -62,6 +62,9 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->best_match = NULL;
pCompressor->improved_match = NULL;
pCompressor->arrival = NULL;
pCompressor->rep_handled_mask = NULL;
pCompressor->first_offset_for_byte = NULL;
pCompressor->next_offset_for_pos = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
@ -89,7 +92,7 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << MATCHES_PER_ARRIVAL_SHIFT) * sizeof(lzsa_arrival));
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT) * sizeof(lzsa_arrival));
if (pCompressor->arrival) {
pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
@ -102,8 +105,23 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V2 * sizeof(lzsa_match));
else
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
if (pCompressor->match)
return 0;
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->rep_handled_mask = (char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
return 0;
}
}
}
}
else {
return 0;
}
}
}
}
}
@ -124,6 +142,21 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->next_offset_for_pos) {
free(pCompressor->next_offset_for_pos);
pCompressor->next_offset_for_pos = NULL;
}
if (pCompressor->first_offset_for_byte) {
free(pCompressor->first_offset_for_byte);
pCompressor->first_offset_for_byte = NULL;
}
if (pCompressor->rep_handled_mask) {
free(pCompressor->rep_handled_mask);
pCompressor->rep_handled_mask = NULL;
}
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;

View File

@ -49,10 +49,10 @@ extern "C" {
#define VISITED_FLAG 0x80000000
#define EXCL_VISITED_MASK 0x7fffffff
#define NMATCHES_PER_ARRIVAL_V1 8
#define NMATCHES_PER_ARRIVAL_V2_SMALL 9
#define NMATCHES_PER_ARRIVAL_V2_BIG 32
#define MATCHES_PER_ARRIVAL_SHIFT 5
#define NARRIVALS_PER_POSITION_V1 8
#define NARRIVALS_PER_POSITION_V2_SMALL 9
#define NARRIVALS_PER_POSITION_V2_BIG 32
#define ARRIVALS_PER_POSITION_SHIFT 5
#define NMATCHES_PER_INDEX_V1 8
#define MATCHES_PER_INDEX_SHIFT_V1 3
@ -63,8 +63,6 @@ extern "C" {
#define LEAVE_ALONE_MATCH_SIZE 300
#define LEAVE_ALONE_MATCH_SIZE_SMALL 1000
#define LAST_LITERALS 0
#define MODESWITCH_PENALTY 3
/** One match */
@ -81,12 +79,10 @@ typedef struct {
int from_pos;
unsigned short rep_len;
unsigned short match_len;
int rep_pos;
int num_literals;
int score;
unsigned short match_offset;
unsigned short match_len;
} lzsa_arrival;
/** Compression statistics */
@ -128,6 +124,9 @@ typedef struct _lzsa_compressor {
lzsa_match *best_match;
lzsa_match *improved_match;
lzsa_arrival *arrival;
char *rep_handled_mask;
int *first_offset_for_byte;
int *next_offset_for_pos;
int min_match_size;
int format_version;
int flags;