Compare commits

...

64 Commits

Author SHA1 Message Date
Emmanuel Marty
15ee2dfe11
Bump version 2023-02-27 18:23:00 +01:00
Emmanuel Marty
35ec6d73da
Optimal LZSA1 compression 2023-02-27 08:26:42 +01:00
Emmanuel Marty
6b08bc3990
Update README 2023-02-13 10:37:25 +01:00
Emmanuel Marty
9350d977bf
Add consts 2023-02-10 17:08:03 +01:00
Emmanuel Marty
82f03b55e3
Faster LZSA1 compression 2023-02-02 11:11:14 +01:00
Emmanuel Marty
583e4db62e
Small improvements 2023-01-30 13:21:43 +01:00
Emmanuel Marty
398885a52d
Small simplifications in matchfinder 2023-01-30 13:19:03 +01:00
Emmanuel Marty
21a0dc70c8
Fix CppCheck warnings 2023-01-30 13:17:30 +01:00
Emmanuel Marty
8cea101625
Bump version 2022-10-21 13:59:48 +02:00
Emmanuel Marty
b86ccf8f7b
Small LZSA1 improvements; remove unneeded tests 2022-10-20 17:16:34 +02:00
Emmanuel Marty
185ea0cbf2
Compress LZSA1 another 25% faster; minor cleanup 2022-10-19 10:39:40 +02:00
Emmanuel Marty
ed81dd69df
Fix C99 warning 2022-10-18 07:56:51 +02:00
Emmanuel Marty
bea90736d5
Avoid forward declarations 2022-10-17 18:37:06 +02:00
Emmanuel Marty
3eaf926c1a
Fix some documentation comments 2022-10-17 09:43:56 +02:00
Emmanuel Marty
1bca5b995a
Add documentation comments 2022-10-17 09:42:39 +02:00
Emmanuel Marty
5484395465
Add more missing constants; more minor cleanup 2022-10-16 18:39:24 +02:00
Emmanuel Marty
34ed06abfb
Add missing consts; remove unneeded code; clean up 2022-10-15 12:10:41 +02:00
Emmanuel Marty
930383a18a
Bump version 2022-05-04 15:03:32 +02:00
Emmanuel Marty
eeec526eeb
Compress LZSA1 another 35% faster 2022-05-04 11:32:21 +02:00
Emmanuel Marty
613f3ef0d7
Small cleanup 2022-04-28 08:02:27 +02:00
Emmanuel Marty
38bfea7ecf
Small LZSA2 improvement 2022-04-20 14:06:19 +02:00
Emmanuel Marty
a5f3691d4f
Small cleanup; compress LZSA2 another 1% faster 2022-04-19 09:18:09 +02:00
Emmanuel Marty
55101e8ac7
Merge pull request #67 from emmanuel-marty/spke/lzsa2_z80_fast
New fast decompressor for LZSA2 (-6 bytes, +1% speed)
2022-04-08 16:18:39 +02:00
specke
b1a87b55da New fast decompressor for LZSA2 (-6 bytes, +1% speed) 2022-04-08 13:34:56 +01:00
Emmanuel Marty
120bd71ed9
Bump version 2022-04-06 10:33:31 +02:00
Emmanuel Marty
86b7fe0738
Merge pull request #66 from emmanuel-marty/spke/new_z80_small
New small decompressor for LZSA2 (-5 bytes, +1% speed)
2022-04-05 18:22:18 +02:00
specke
efc19e9d93 New small decompressor for LZSA2 (-5 bytes, +1% speed) 2022-04-05 11:41:52 +01:00
Emmanuel Marty
ae942d4eec
Small LZSA2 speedup in forward arrivals parser 2022-04-05 07:13:03 +02:00
Emmanuel Marty
81fd3af0b3
Small improvement 2022-04-04 13:58:00 +02:00
Emmanuel Marty
34de880080
Compress LZSA2 a bit faster again; small cleanup 2022-04-03 20:23:55 +02:00
Emmanuel Marty
9e11c0893a
Pack LZSA1 35% faster, and LZSA2 another 5% faster 2022-04-02 08:49:26 +02:00
Emmanuel Marty
acfa11d733
Bump version 2022-01-02 07:52:10 +01:00
Emmanuel Marty
6b3dff18a5
Compress ~5% faster; small ratio increase 2022-01-02 07:15:23 +01:00
Emmanuel Marty
9a8a04f9b2
Bump version 2021-11-30 10:54:48 +01:00
Emmanuel Marty
79fadb350e
Tiny token reduction, ratio increase for some data 2021-11-28 10:01:11 +01:00
Emmanuel Marty
61fb2b881e
Tiny LZSA1 ratio increase 2021-11-26 15:41:50 +01:00
Emmanuel Marty
6389895e41
Free, tiny LZSA2 ratio increase for some files 2021-11-26 11:24:44 +01:00
Emmanuel Marty
a48db51134
Merge pull request #64 from jbrandwood/master
Fix reorganized 6502 decompress_faster. Approx 3-4% faster, LZSA2 add…
2021-11-23 12:16:28 +01:00
John Brandwood
4b046625e6 Fix reorganized 6502 decompress_faster. Approx 3-4% faster, LZSA2 adds 1 byte. 2021-11-22 17:02:37 -05:00
Emmanuel Marty
978c44eca7
Revert 2021-11-21 18:03:03 +01:00
Emmanuel Marty
bed1006a6d
Merge pull request #63 from jbrandwood/master
Make 6502 decompress_faster_v2.asm (& v1) 3-4% faster at the cost of 1 byte extra.
2021-11-21 12:14:22 +01:00
John Brandwood
7610a965a5 Make 6502 decompress_faster_v2.asm (& v1) 3-4% faster at the cost of 1 byte. 2021-11-20 15:10:56 -05:00
Emmanuel Marty
4e59375048
Bump version 2021-10-13 11:49:49 +02:00
Emmanuel Marty
854c03bd53
Another small LZSA2 speedup 2021-10-12 21:02:16 +02:00
Emmanuel Marty
6aee0031ed
Increase LZSA2 ratio a little further 2021-10-12 15:52:11 +02:00
Emmanuel Marty
bb1b4fda14
Compress LZSA2 a bit faster again 2021-10-10 07:52:03 +02:00
Emmanuel Marty
c6a93601cf
Compress a little faster again 2021-10-05 12:11:39 +02:00
Emmanuel Marty
f665e8307d
Merge pull request #62 from Mistranger/master
Allow piping files to stdin and stdout.
2021-09-10 12:42:19 +02:00
cybermind
c1a2e9a82c Allow piping files to stdin and stdout 2021-07-26 22:06:54 +05:00
Emmanuel Marty
c0259a77b4
Bump version 2021-06-06 11:40:58 +02:00
Emmanuel Marty
5e404e93d1
Small LZSA2 ratio increase for some files 2021-06-04 19:11:22 +02:00
Emmanuel Marty
65d6972f2c
Add link to streamed LZSA2 depacker for 8088 2021-04-30 14:48:23 +02:00
Emmanuel Marty
bbf782ced8
Update README 2021-04-30 14:42:43 +02:00
Emmanuel Marty
5cfec00d87
Add Apple II+/IIe demo link to README 2021-04-15 13:21:26 +02:00
Emmanuel Marty
48f64a1d20
Merge pull request #60 from specke/master
5% faster decompressor for LZSA1 on Z80
2021-04-08 09:17:17 +02:00
introspec
e9a85e92dc
Minor edit
Make returned values more predictable
2021-04-07 19:54:23 +01:00
introspec
004db30296
Improved "fast" Z80 decompressor for LZSA1
113(+4) bytes, +5% speed
2021-04-07 19:50:12 +01:00
introspec
4eae728e56
Merge pull request #7 from emmanuel-marty/master
Synchronize with the head repository
2021-04-07 19:47:08 +01:00
Emmanuel Marty
42aad36b4d
Merge pull request #59 from jbrandwood/master
Reorganize 6502 decompress_faster depackers for smaller size and grea…
2021-03-26 14:22:42 +01:00
John Brandwood
03fd8751b0 Reorganize 6502 decompress_faster depackers for smaller size and greater speed. 2021-03-25 19:01:35 -04:00
Emmanuel Marty
95a189b1ed
Merge pull request #58 from specke/master
Added a good game that uses LZSA
2021-03-24 13:47:26 +01:00
introspec
be29323516
Update README.md
Added Marsmare: Alienation
2021-03-24 10:35:19 +00:00
introspec
9cd6c554c5
Merge pull request #6 from emmanuel-marty/master
Synchronize with the head
2021-03-24 10:30:25 +00:00
Emmanuel Marty
8075b5ab68
Fix #54 (LZSA2 spec typo) reported by remy-luisant 2021-01-13 11:30:17 +01:00
31 changed files with 1303 additions and 1525 deletions

View File

@ -58,7 +58,7 @@ Note that the match offset is negative: it is added to the current decompressed
If the encoded match length is 7 or more, the 'M' bits in the token form the value 7, and an extra nibble is read:
* 0-14: the value is added to the 3 stored in the token, and then the minmatch of 2 is added, to compose the final match length.
* 0-14: the value is added to the 7 stored in the token, and then the minmatch of 2 is added, to compose the final match length.
* 15: an extra byte follows
If an extra byte follows here, it can have two possible types of value:

View File

@ -17,6 +17,12 @@ The popular [rasm](https://github.com/EdouardBERGE/rasm) assembler for Z80 featu
The [desolate](https://github.com/nzeemin/spectrum-desolate) game port to the ZX Spectrum uses LZSA compression on Z80.
[Marsmare: Alienation](https://zxonline.net/game/marsmare-alienation/), the winner of the recent [Yandex Retro Games Battle 2020](https://yandex.ru/museum/yrgb-2020-en), is using LZSA to compress its assets.
The [Lowtech demo](https://github.com/wiz21b/lowtech) for the Apple II+ and IIe, by Wiz/Imphobia, compresses data with LZSA.
The [Druid & Droid](https://leosoft.itch.io/druid-and-droid) game for the Amstrad CPC, also uses LZSA for compression.
The LZSA compression tool uses an aggressive optimal packing strategy to try to find the sequence of commands that gives the smallest packed file that decompresses to the original while maintaining the maximum possible decompression speed.
The compression formats give the user choices that range from decompressing faster than LZ4 on 8-bit systems with better compression, to compressing as well as ZX7 with much better decompression speed. LZSA1 is designed to replace LZ4 and LZSA2 to replace ZX7, in 8-bit scenarios.
@ -84,10 +90,10 @@ License:
External links:
* [i8080 decompressors](https://gitlab.com/ivagor/lzsa8080/tree/master) by Ivan Gorodetsky
* [PDP-11 decompressors](https://gitlab.com/ivagor/lzsa8080/tree/master/PDP11) also by Ivan Gorodetsky
* [i8080 and PDP-11 decompressors](https://github.com/ivagorRetrocomp/DeLZSA) by Ivan Gorodetsky
* [MC68000 decompressors](https://github.com/tattlemuss/lz4-m68k/blob/master/src/lzsa.s) by Steven Tattersall
* [Gameboy decompressors](https://github.com/meltycode) by Meltycode, based on the Z80 code by introspec
* [Streamed LZSA2 depacker](https://hg.ulukai.org/ecm/inicomp/file/c1a1f9bd4382/lzsa2.asm) by C. Masloch
* LZSA's page on [Pouet](https://www.pouet.net/prod.php?which=81573)
# Compressed format

View File

@ -7,12 +7,9 @@
;
; This code is written for the ACME assembler.
;
; Optional code is presented for one minor 6502 optimization that breaks
; compatibility with the current LZSA1 format standard.
; The code is 165 bytes for the small version, and 191 bytes for the normal.
;
; The code is 168 bytes for the small version, and 205 bytes for the normal.
;
; Copyright John Brandwood 2019.
; Copyright John Brandwood 2021.
;
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
@ -30,103 +27,33 @@
;
;
; Choose size over space (within sane limits)?
; Choose size over decompression speed (within sane limits)?
;
LZSA_SMALL_SIZE = 0
;
; Remove code inlining to save space?
;
; This saves 15 bytes of code at the cost of 7% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_NO_INLINE = 1
} else {
LZSA_NO_INLINE = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 15% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_CP = 1
} else {
LZSA_SHORT_CP = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 30% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_LZ = 1
} else {
LZSA_SHORT_LZ = 0
}
;
; Macro to increment the source pointer to the next page.
;
; This should call a subroutine to determine if a bank
; has been crossed, and a new bank should be paged in.
;
!macro LZSA_INC_PAGE {
inc <lzsa_srcptr + 1
}
;
; Macro to read a byte from the compressed source data.
;
!if LZSA_NO_INLINE {
!macro LZSA_GET_SRC {
jsr lzsa1_get_byte
}
} else {
!macro LZSA_GET_SRC {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .skip
+LZSA_INC_PAGE
.skip:
}
}
; ***************************************************************************
; ***************************************************************************
;
; Data usage is last 8 bytes of zero-page.
; Data usage is last 7 bytes of zero-page.
;
!if (LZSA_SHORT_CP | LZSA_SHORT_LZ) {
lzsa_length = $F8 ; 1 byte.
}
lzsa_cmdbuf = $F9 ; 1 byte.
lzsa_winptr = $FA ; 1 word.
lzsa_srcptr = $FC ; 1 word.
lzsa_dstptr = $FE ; 1 word.
lzsa_offset = lzsa_winptr
LZSA_SRC_LO = $FC
LZSA_SRC_HI = $FD
LZSA_DST_LO = $FE
LZSA_DST_HI = $FF
; ***************************************************************************
; ***************************************************************************
;
@ -134,7 +61,6 @@ LZSA_DST_HI = $FF
;
; Args: lzsa_srcptr = ptr to compessed data
; Args: lzsa_dstptr = ptr to output buffer
; Uses: lots!
;
DECOMPRESS_LZSA1_FAST:
@ -147,171 +73,175 @@ lzsa1_unpack: ldy #0 ; Initialize source index.
; N.B. X=0 is expected and guaranteed when we get here.
;
.cp_length: +LZSA_GET_SRC
sta <lzsa_cmdbuf ; Preserve this for later.
.cp_length: !if LZSA_SMALL_SIZE {
jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .cp_skip0
inc <lzsa_srcptr + 1
}
.cp_skip0: sta <lzsa_cmdbuf ; Preserve this for later.
and #$70 ; Extract literal length.
lsr ; Set CC before ...
beq .lz_offset ; Skip directly to match?
lsr ; Get 3-bit literal length.
lsr
lsr
lsr
cmp #$07 ; Extended length?
bne .got_cp_len
bcc .cp_got_len
jsr .get_length ; CS from CMP, X=0.
jsr .get_length ; X=0, CS from CMP, returns CC.
stx .cp_npages + 1 ; Hi-byte of length.
!if LZSA_SHORT_CP {
.cp_got_len: tax ; Lo-byte of length.
.got_cp_len: cmp #0 ; Check the lo-byte of length.
beq .put_cp_len
inx ; Increment # of pages to copy.
.put_cp_len: stx <lzsa_length
tax
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
.cp_byte: lda (lzsa_srcptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y ; of this .cp_page loop.
inc <lzsa_srcptr + 0
bne .skip1
bne .cp_skip1
inc <lzsa_srcptr + 1
.skip1: inc <lzsa_dstptr + 0
bne .skip2
.cp_skip1: inc <lzsa_dstptr + 0
bne .cp_skip2
inc <lzsa_dstptr + 1
.skip2: dex
bne .cp_page
dec <lzsa_length ; Any full pages left to copy?
bne .cp_page
.cp_skip2: dex
bne .cp_byte
.cp_npages: lda #0 ; Any full pages left to copy?
beq .lz_offset
} else {
dec .cp_npages + 1 ; Unlikely, so can be slow.
bcc .cp_byte ; Always true!
.got_cp_len: tay ; Check the lo-byte of length.
beq .cp_page
inx ; Increment # of pages to copy.
.get_cp_src: clc ; Calc address of partial page.
adc <lzsa_srcptr + 0
sta <lzsa_srcptr + 0
bcs .get_cp_dst
dec <lzsa_srcptr + 1
.get_cp_dst: tya
clc ; Calc address of partial page.
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .get_cp_idx
dec <lzsa_dstptr + 1
.get_cp_idx: tya ; Negate the lo-byte of length.
eor #$FF
tay
iny
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
iny
bne .cp_page
inc <lzsa_srcptr + 1
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .cp_page
}
!if LZSA_SMALL_SIZE {
;
; Copy bytes from decompressed window.
;
; Shorter but slower version.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.lz_offset: +LZSA_GET_SRC
clc
adc <lzsa_dstptr + 0
.lz_offset: jsr .get_byte ; Get offset-lo.
.offset_lo: adc <lzsa_dstptr + 0 ; Always CC from .cp_page loop.
sta <lzsa_winptr + 0
lda #$FF
bit <lzsa_cmdbuf
bpl .hi_offset
+LZSA_GET_SRC
bpl .offset_hi
.hi_offset: adc <lzsa_dstptr + 1
sta <lzsa_winptr + 1
jsr .get_byte ; Get offset-hi.
.offset_hi: adc <lzsa_dstptr + 1 ; lzsa_winptr < lzsa_dstptr, so
sta <lzsa_winptr + 1 ; always leaves CS.
.lz_length: lda <lzsa_cmdbuf ; X=0 from previous loop.
and #$0F
adc #$03 - 1 ; CS from previous ADC.
cmp #$12 ; Extended length?
bne .got_lz_len
bcc .lz_got_len
jsr .get_length ; CS from CMP, X=0.
jsr .get_length ; CS from CMP, X=0, returns CC.
stx .lz_npages + 1 ; Hi-byte of length.
!if LZSA_SHORT_LZ {
.lz_got_len: tax ; Lo-byte of length.
.got_lz_len: cmp #0 ; Check the lo-byte of length.
beq .put_lz_len
inx ; Increment # of pages to copy.
.put_lz_len: stx <lzsa_length
tax
.lz_page: lda (lzsa_winptr),y
sta (lzsa_dstptr),y
.lz_byte: lda (lzsa_winptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y ; of this .lz_page loop.
inc <lzsa_winptr + 0
bne .skip3
bne .lz_skip1
inc <lzsa_winptr + 1
.skip3: inc <lzsa_dstptr + 0
bne .skip4
.lz_skip1: inc <lzsa_dstptr + 0
bne .lz_skip2
inc <lzsa_dstptr + 1
.skip4: dex
bne .lz_page
dec <lzsa_length ; Any full pages left to copy?
bne .lz_page
.lz_skip2: dex
bne .lz_byte
.lz_npages: lda #0 ; Any full pages left to copy?
beq .cp_length
jmp .cp_length ; Loop around to the beginning.
dec .lz_npages + 1 ; Unlikely, so can be slow.
bcc .lz_byte ; Always true!
} else {
.got_lz_len: tay ; Check the lo-byte of length.
beq .lz_page
;
; Copy bytes from decompressed window.
;
; Longer but faster.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
inx ; Increment # of pages to copy.
.lz_offset: lda (lzsa_srcptr),y ; Get offset-lo.
inc <lzsa_srcptr + 0
bne .offset_lo
inc <lzsa_srcptr + 1
.get_lz_win: clc ; Calc address of partial page.
adc <lzsa_winptr + 0
sta <lzsa_winptr + 0
bcs .get_lz_dst
dec <lzsa_winptr + 1
.offset_lo: sta <lzsa_offset + 0
.get_lz_dst: tya
clc ; Calc address of partial page.
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .get_lz_idx
lda #$FF ; Get offset-hi.
bit <lzsa_cmdbuf
bpl .offset_hi
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .offset_hi
inc <lzsa_srcptr + 1
.offset_hi: sta <lzsa_offset + 1
.lz_length: lda <lzsa_cmdbuf ; X=0 from previous loop.
and #$0F
adc #$03 ; Always CC from .cp_page loop.
cmp #$12 ; Extended length?
bcc .got_lz_len
jsr .get_length ; X=0, CS from CMP, returns CC.
.got_lz_len: inx ; Hi-byte of length+256.
eor #$FF ; Negate the lo-byte of length
tay
eor #$FF
.get_lz_dst: adc <lzsa_dstptr + 0 ; Calc address of partial page.
sta <lzsa_dstptr + 0 ; Always CC from previous CMP.
iny
bcs .get_lz_win
beq .get_lz_win ; Is lo-byte of length zero?
dec <lzsa_dstptr + 1
.get_lz_idx: tya ; Negate the lo-byte of length.
eor #$FF
tay
iny
.get_lz_win: clc ; Calc address of match.
adc <lzsa_offset + 0 ; N.B. Offset is negative!
sta <lzsa_winptr + 0
lda <lzsa_dstptr + 1
adc <lzsa_offset + 1
sta <lzsa_winptr + 1
.lz_page: lda (lzsa_winptr),y
.lz_byte: lda (lzsa_winptr),y
sta (lzsa_dstptr),y
iny
bne .lz_page
inc <lzsa_winptr + 1
bne .lz_byte
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .lz_page
bne .lz_more
jmp .cp_length ; Loop around to the beginning.
.lz_more: inc <lzsa_winptr + 1 ; Unlikely, so can be slow.
bne .lz_byte ; Always true!
}
;
; Get 16-bit length in X:A register pair.
; Get 16-bit length in X:A register pair, return with CC.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
@ -320,34 +250,33 @@ lzsa1_unpack: ldy #0 ; Initialize source index.
adc (lzsa_srcptr),y ; the length.
inc <lzsa_srcptr + 0
bne .skip_inc
+LZSA_INC_PAGE
inc <lzsa_srcptr + 1
.skip_inc: bcc .got_length ; No overflow means done.
cmp #$00 ; Overflow to 256 or 257?
clc ; MUST return CC!
tax ; Preserve overflow value.
.extra_byte: jsr .get_byte ; So rare, this can be slow!
pha
txa ; Overflow to 256 or 257?
beq .extra_word
.extra_byte: inx
jmp lzsa1_get_byte ; So rare, this can be slow!
.extra_word: jsr lzsa1_get_byte ; So rare, this can be slow!
pha
jsr lzsa1_get_byte ; So rare, this can be slow!
tax
beq .finished ; Length-hi == 0 at EOF.
pla ; Length-lo.
rts
lzsa1_get_byte:
lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
beq lzsa1_next_page
.check_length: pla ; Length-lo.
bne .got_length ; Check for zero.
dex ; Do one less page loop if so.
.got_length: rts
lzsa1_next_page:
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
rts
.extra_word: jsr .get_byte ; So rare, this can be slow!
tax
bne .check_length ; Length-hi == 0 at EOF.
.finished: pla ; Length-lo.
pla ; Decompression completed, pop
pla ; return address.
rts
.get_byte: lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
bne .got_byte
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
.got_byte: rts

View File

@ -7,12 +7,9 @@
;
; This code is written for the ACME assembler.
;
; Optional code is presented for two minor 6502 optimizations that break
; compatibility with the current LZSA2 format standard.
; The code is 241 bytes for the small version, and 256 bytes for the normal.
;
; The code is 241 bytes for the small version, and 267 bytes for the normal.
;
; Copyright John Brandwood 2019.
; Copyright John Brandwood 2021.
;
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
@ -30,99 +27,11 @@
;
;
; Choose size over space (within sane limits)?
; Choose size over decompression speed (within sane limits)?
;
LZSA_SMALL_SIZE = 0
;
; Remove code inlining to save space?
;
; This saves 15 bytes of code at the cost of 7% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_NO_INLINE = 1
} else {
LZSA_NO_INLINE = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 5% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_CP = 1
} else {
LZSA_SHORT_CP = 0
}
;
; We will read from or write to $FFFF. This prevents the
; use of the "INC ptrhi / BNE" trick and reduces speed.
;
LZSA_USE_FFFF = 0
;
; Macro to increment the source pointer to the next page.
;
!macro LZSA_INC_PAGE {
inc <lzsa_srcptr + 1
}
;
; Macro to read a byte from the compressed source data.
;
!if LZSA_NO_INLINE {
!macro LZSA_GET_SRC {
jsr lzsa2_get_byte
}
} else {
!macro LZSA_GET_SRC {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .skip
+LZSA_INC_PAGE
.skip:
}
}
;
; Macro to speed up reading 50% of nibbles.
;
; This seems to save very few cycles compared to the
; increase in code size, and it isn't recommended.
;
LZSA_SLOW_NIBL = 1
!if (LZSA_SLOW_NIBL + LZSA_SMALL_SIZE) {
!macro LZSA_GET_NIBL {
jsr lzsa2_get_nibble ; Always call a function.
}
} else {
!macro LZSA_GET_NIBL {
lsr <lzsa_nibflg ; Is there a nibble waiting?
lda <lzsa_nibble ; Extract the lo-nibble.
bcs .skip
jsr lzsa2_new_nibble ; Extract the hi-nibble.
.skip: ora #$F0
}
}
; ***************************************************************************
@ -131,6 +40,8 @@ LZSA_SLOW_NIBL = 1
; Data usage is last 11 bytes of zero-page.
;
lzsa_length = lzsa_winptr ; 1 word.
lzsa_cmdbuf = $F5 ; 1 byte.
lzsa_nibflg = $F6 ; 1 byte.
lzsa_nibble = $F7 ; 1 byte.
@ -155,53 +66,33 @@ LZSA_DST_HI = $FF
;
; Args: lzsa_srcptr = ptr to compessed data
; Args: lzsa_dstptr = ptr to output buffer
; Uses: lots!
;
DECOMPRESS_LZSA2_FAST:
lzsa2_unpack: ldy #0 ; Initialize source index.
lzsa2_unpack: ldx #$00 ; Hi-byte of length or offset.
ldy #$00 ; Initialize source index.
sty <lzsa_nibflg ; Initialize nibble buffer.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
beq .cp_length ; always taken
.incsrc1:
inc <lzsa_srcptr + 1
bne .resume_src1 ; always taken
!if LZSA_SHORT_CP {
.incsrc2:
inc <lzsa_srcptr + 1
bne .resume_src2 ; always taken
.incdst:
inc <lzsa_dstptr + 1
bne .resume_dst ; always taken
}
}
;
; Copy bytes from compressed source data.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.cp_length: ldx #$00 ; Hi-byte of length or offset.
.cp_length: !if LZSA_SMALL_SIZE {
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC
jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
beq .incsrc1
bne .cp_skip0
inc <lzsa_srcptr + 1
}
.resume_src1:
sta <lzsa_cmdbuf ; Preserve this for later.
.cp_skip0: sta <lzsa_cmdbuf ; Preserve this for later.
and #$18 ; Extract literal length.
beq .lz_offset ; Skip directly to match?
@ -209,245 +100,177 @@ lzsa2_unpack: ldy #0 ; Initialize source index.
lsr
lsr
cmp #$03 ; Extended length?
bne .got_cp_len
bcc .cp_got_len
jsr .get_length ; X=0 table index for literals.
jsr .get_length ; X=0 for literals, returns CC.
stx .cp_npages + 1 ; Hi-byte of length.
!if LZSA_SHORT_CP {
.cp_got_len: tax ; Lo-byte of length.
.got_cp_len: cmp #0 ; Check the lo-byte of length.
beq .put_cp_len
inx ; Increment # of pages to copy.
.put_cp_len: stx <lzsa_length
tax
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
.cp_byte: lda (lzsa_srcptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y ; of this .cp_page loop.
inc <lzsa_srcptr + 0
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
bne .skip1
bne .cp_skip1
inc <lzsa_srcptr + 1
.skip1: inc <lzsa_dstptr + 0
bne .skip2
.cp_skip1: inc <lzsa_dstptr + 0
bne .cp_skip2
inc <lzsa_dstptr + 1
.skip2:
.cp_skip2: dex
bne .cp_byte
.cp_npages: lda #0 ; Any full pages left to copy?
beq .lz_offset
} else {
beq .incsrc2
.resume_src2:
inc <lzsa_dstptr + 0
beq .incdst
.resume_dst:
}
dex
bne .cp_page
dec <lzsa_length ; Any full pages left to copy?
bne .cp_page
} else {
.got_cp_len: tay ; Check the lo-byte of length.
beq .cp_page
inx ; Increment # of pages to copy.
.get_cp_src: clc ; Calc address of partial page.
adc <lzsa_srcptr + 0
sta <lzsa_srcptr + 0
bcs .get_cp_dst
dec <lzsa_srcptr + 1
.get_cp_dst: tya
clc ; Calc address of partial page.
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .get_cp_idx
dec <lzsa_dstptr + 1
.get_cp_idx: tya ; Negate the lo-byte of length.
eor #$FF
tay
iny
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
iny
bne .cp_page
inc <lzsa_srcptr + 1
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .cp_page
}
; ================================
; xyz
; 00z 5-bit offset
; 01z 9-bit offset
; 10z 13-bit offset
; 110 16-bit offset
; 111 repeat offset
.lz_offset: lda <lzsa_cmdbuf
asl
bcs .get_13_16_rep
asl
bcs .get_9_bits
.get_5_bits: dex ; X=$FF
.get_13_bits: asl
php
+LZSA_GET_NIBL ; Always returns with CS.
plp
rol ; Shift into position, set C.
eor #$01
cpx #$00 ; X=$FF for a 5-bit offset.
bne .set_offset
sbc #2 ; Subtract 512 because 13-bit
; offset starts at $FE00.
bne .get_low8x ; Always NZ from previous SBC.
.get_9_bits: dex ; X=$FF if CS, X=$FE if CC.
asl
bcc .get_low8
dex
bcs .get_low8 ; Always VS from previous BIT.
.get_13_16_rep: asl
bcc .get_13_bits ; Shares code with 5-bit path.
.get_16_rep: bmi .lz_length ; Repeat previous offset.
dec .cp_npages + 1 ; Unlikely, so can be slow.
bcc .cp_byte ; Always true!
;
; Copy bytes from decompressed window.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
; xyz
; ===========================
; 00z 5-bit offset
; 01z 9-bit offset
; 10z 13-bit offset
; 110 16-bit offset
; 111 repeat offset
;
.get_16_bits: jsr lzsa2_get_byte ; Get hi-byte of offset.
.lz_offset: lda <lzsa_cmdbuf
asl
bcs .get_13_16_rep
.get_low8x: tax
.get_5_9_bits: dex ; X=$FF for a 5-bit offset.
asl
bcs .get_9_bits ; Fall through if 5-bit.
.get_low8:
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
.get_13_bits: asl ; Both 5-bit and 13-bit read
php ; a nibble.
jsr .get_nibble
plp
rol ; Shift into position, clr C.
eor #$E1
cpx #$00 ; X=$FF for a 5-bit offset.
bne .set_offset
sbc #2 ; 13-bit offset from $FE00.
bne .set_hi_8 ; Always NZ from previous SBC.
+LZSA_GET_SRC ; Get lo-byte of offset.
.get_9_bits: asl ; X=$FF if CC, X=$FE if CS.
bcc .get_lo_8
dex
bcs .get_lo_8 ; Always CS from previous BCC.
.get_13_16_rep: asl
bcc .get_13_bits ; Shares code with 5-bit path.
.get_16_rep: bmi .lz_length ; Repeat previous offset.
.get_16_bits: jsr .get_byte ; Get hi-byte of offset.
.set_hi_8: tax
.get_lo_8: !if LZSA_SMALL_SIZE {
jsr .get_byte ; Get lo-byte of offset.
} else {
lda (lzsa_srcptr),y
lda (lzsa_srcptr),y ; Get lo-byte of offset.
inc <lzsa_srcptr + 0
beq .incsrc3
.resume_src3:
bne .set_offset
inc <lzsa_srcptr + 1
}
.set_offset: stx <lzsa_offset + 1 ; Save new offset.
sta <lzsa_offset + 0
.set_offset: sta <lzsa_offset + 0 ; Save new offset.
stx <lzsa_offset + 1
.lz_length: ldx #$00 ; Hi-byte of length.
.lz_length: ldx #1 ; Hi-byte of length+256.
lda <lzsa_cmdbuf
and #$07
clc
adc #$02
cmp #$09 ; Extended length?
bne .got_lz_len
bcc .got_lz_len
inx
jsr .get_length ; X=1 table index for match.
jsr .get_length ; X=1 for match, returns CC.
inx ; Hi-byte of length+256.
.got_lz_len: eor #$FF ; Negate the lo-byte of length
tay ; and check for zero.
iny
beq .calc_lz_addr
.got_lz_len: eor #$FF ; Negate the lo-byte of length.
tay
eor #$FF
inx ; Increment # of pages to copy.
clc ; Calc destination for partial
adc <lzsa_dstptr + 0 ; page.
sta <lzsa_dstptr + 0
bcs .calc_lz_addr
.get_lz_dst: adc <lzsa_dstptr + 0 ; Calc address of partial page.
sta <lzsa_dstptr + 0 ; Always CC from previous CMP.
iny
bcs .get_lz_win
beq .get_lz_win ; Is lo-byte of length zero?
dec <lzsa_dstptr + 1
.calc_lz_addr: clc ; Calc address of match.
lda <lzsa_dstptr + 0 ; N.B. Offset is negative!
adc <lzsa_offset + 0
.get_lz_win: clc ; Calc address of match.
adc <lzsa_offset + 0 ; N.B. Offset is negative!
sta <lzsa_winptr + 0
lda <lzsa_dstptr + 1
adc <lzsa_offset + 1
sta <lzsa_winptr + 1
.lz_page: lda (lzsa_winptr),y
.lz_byte: lda (lzsa_winptr),y
sta (lzsa_dstptr),y
iny
bne .lz_page
inc <lzsa_winptr + 1
bne .lz_byte
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .lz_page
bne .lz_more
jmp .cp_length ; Loop around to the beginning.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
.incsrc3:
inc <lzsa_srcptr + 1
bne .resume_src3 ; always taken
}
.lz_more: inc <lzsa_winptr + 1 ; Unlikely, so can be slow.
bne .lz_byte ; Always true!
;
; Lookup tables to differentiate literal and match lengths.
;
.nibl_len_tbl: !byte 3 + $10 ; 0+3 (for literal).
!byte 9 + $10 ; 2+7 (for match).
.nibl_len_tbl: !byte 3 ; 0+3 (for literal).
!byte 9 ; 2+7 (for match).
.byte_len_tbl: !byte 18 - 1 ; 0+3+15 - CS (for literal).
!byte 24 - 1 ; 2+7+15 - CS (for match).
;
; Get 16-bit length in X:A register pair.
; Get 16-bit length in X:A register pair, return with CC.
;
.get_length: +LZSA_GET_NIBL
cmp #$FF ; Extended length?
.get_length: jsr .get_nibble
cmp #$0F ; Extended length?
bcs .byte_length
adc .nibl_len_tbl,x ; Always CC from previous CMP.
.got_length: ldx #$00 ; Set hi-byte of 4 & 8 bit
rts ; lengths.
.byte_length: jsr lzsa2_get_byte ; So rare, this can be slow!
.byte_length: jsr .get_byte ; So rare, this can be slow!
adc .byte_len_tbl,x ; Always CS from previous CMP.
bcc .got_length
beq .finished
.word_length: jsr lzsa2_get_byte ; So rare, this can be slow!
.word_length: clc ; MUST return CC!
jsr .get_byte ; So rare, this can be slow!
pha
jsr lzsa2_get_byte ; So rare, this can be slow!
jsr .get_byte ; So rare, this can be slow!
tax
pla
rts
bne .got_word ; Check for zero lo-byte.
dex ; Do one less page loop if so.
.got_word: rts
lzsa2_get_byte:
lda (lzsa_srcptr),y ; Subroutine version for when
.get_byte: lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
beq lzsa2_next_page
rts
lzsa2_next_page:
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
rts
bne .got_byte
inc <lzsa_srcptr + 1
.got_byte: rts
.finished: pla ; Decompression completed, pop
pla ; return address.
@ -457,66 +280,29 @@ lzsa2_next_page:
; Get a nibble value from compressed data in A.
;
!if (LZSA_SLOW_NIBL | LZSA_SMALL_SIZE) {
lzsa2_get_nibble:
lsr <lzsa_nibflg ; Is there a nibble waiting?
.get_nibble: lsr <lzsa_nibflg ; Is there a nibble waiting?
lda <lzsa_nibble ; Extract the lo-nibble.
bcs .got_nibble
inc <lzsa_nibflg ; Reset the flag.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC
!if LZSA_SMALL_SIZE {
jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
beq .incsrc4
.resume_src4:
}
sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
.got_nibble: ora #$F0
rts
} else {
lzsa2_new_nibble:
inc <lzsa_nibflg ; Reset the flag.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
beq .incsrc4
.resume_src4:
}
sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
rts
}
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
.incsrc4:
bne .set_nibble
inc <lzsa_srcptr + 1
bne .resume_src4 ; always taken
}
.set_nibble: sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
.got_nibble: and #$0F
rts

View File

@ -1,5 +1,5 @@
;
; Speed-optimized LZSA1 decompressor by spke & uniabis (109 bytes)
; Speed-optimized LZSA1 decompressor by spke & uniabis (113 bytes)
;
; ver.00 by spke for LZSA 0.5.4 (03-24/04/2019, 134 bytes);
; ver.01 by spke for LZSA 0.5.6 (25/04/2019, 110(-24) bytes, +0.2% speed);
@ -10,7 +10,8 @@
; ver.06 by spke for LZSA 1.0.7 (27/08/2019, 111(+4) bytes, +2.1% speed);
; ver.07 by spke for LZSA 1.1.0 (25/09/2019, added full revision history);
; ver.08 by spke for LZSA 1.1.2 (22/10/2019, re-organized macros and added an option for unrolled copying of long matches);
; ver.09 by spke for LZSA 1.2.1 (02/01/2020, 109(-2) bytes, same speed)
; ver.09 by spke for LZSA 1.2.1 (02/01/2020, 109(-2) bytes, same speed);
; ver.10 by spke (07/04/2021, 113(+4) bytes, +5% speed)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
@ -58,8 +59,8 @@
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+57 bytes)
; DEFINE BACKWARD_DECOMPRESS
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+51 byte)
; DEFINE BACKWARD_DECOMPRESS ; uncomment to decompress backward compressed data (-3% speed, +5 bytes)
IFNDEF BACKWARD_DECOMPRESS
@ -68,7 +69,8 @@
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
; HL = DE+HL
add hl,de
ENDM
MACRO COPY1
@ -86,8 +88,9 @@
ENDM
MACRO ADD_OFFSET
ex de,hl : ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
; HL = DE-HL
ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 6*4 = 24t / 6 bytes
ENDM
MACRO COPY1
@ -103,19 +106,25 @@
@DecompressLZSA1:
ld b,0 : jr ReadToken
IFNDEF UNROLL_LONG_MATCHES
CopyMatch2: ld c,a
.UseC ex (sp),hl : jr CopyMatch.UseC
ENDIF
NoLiterals: xor (hl) : NEXT_HL : jp m,LongOffset
ShortOffset: push de : ld e,(hl) : ld d,#FF
ShortOffset: push hl : ld l,(hl) : ld h,#FF
; short matches have length 0+3..14+3
add 3 : cp 15+3 : jr nc,LongerMatch
; placed here this saves a JP per iteration
CopyMatch: ld c,a
.UseC NEXT_HL : ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
CopyMatch: ld c,a ; BC = len, DE = dest, HL = offset, SP -> [src]
.UseC ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
COPY1 : COPY1 : COPYBC ; BC = 0, DE = dest
.popSrc pop hl ; HL = src
.popSrc pop hl : NEXT_HL ; HL = src
ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
@ -132,32 +141,32 @@ ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream,
and #8F : jp p,ShortOffset
LongOffset: ; read second byte of the offset
push de : ld e,(hl) : NEXT_HL : ld d,(hl)
ld c,(hl) : NEXT_HL : push hl : ld h,(hl) : ld l,c
add -128+3 : cp 15+3 : jp c,CopyMatch
IFNDEF UNROLL_LONG_MATCHES
; MMMM=15 indicates a multi-byte number of literals
LongerMatch: NEXT_HL : add (hl) : jr nc,CopyMatch
LongerMatch: ex (sp),hl : NEXT_HL : add (hl) : jr nc,CopyMatch2
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyMatch.UseC
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyMatch2.UseC
.code0 NEXT_HL : ld b,(hl)
; the two-byte match length equal to zero
; designates the end-of-data marker
ld a,b : or c : jr nz,CopyMatch.UseC
pop de : ret
ld a,b : or c : jr nz,CopyMatch2.UseC
pop bc : ret
ELSE
; MMMM=15 indicates a multi-byte number of literals
LongerMatch: NEXT_HL : add (hl) : jr c,VeryLongMatch
LongerMatch: ex (sp),hl : NEXT_HL : add (hl) : jr c,VeryLongMatch
ld c,a
.UseC NEXT_HL : ex (sp),hl
.UseC ex (sp),hl
ADD_OFFSET
COPY1 : COPY1
@ -170,7 +179,7 @@ LongerMatch: NEXT_HL : add (hl) : jr c,VeryLongMatch
COPY1
EDUP
jp pe,.fastLDIR
jp CopyMatch.popSrc
jr CopyMatch.popSrc
VeryLongMatch: ; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
@ -181,7 +190,7 @@ VeryLongMatch: ; the codes are designed to overflow;
; the two-byte match length equal to zero
; designates the end-of-data marker
ld a,b : or c : jr nz,LongerMatch.UseC
pop de : ret
pop bc : ret
ENDIF

View File

@ -1,5 +1,5 @@
;
; Speed-optimized LZSA2 decompressor by spke & uniabis (216 bytes)
; Speed-optimized LZSA2 decompressor by spke & uniabis (210 bytes)
;
; ver.00 by spke for LZSA 1.0.0 (02-07/06/2019, 218 bytes);
; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
@ -8,7 +8,8 @@
; ver.04 by spke for LZSA 1.0.7 (01/08/2019, 214(+1) bytes, +0.2% speed and small re-organization of macros);
; ver.05 by spke (27/08/2019, 216(+2) bytes, +1.1% speed);
; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history);
; ver.07 by spke for LZSA 1.1.1 (10/10/2019, +0.2% speed and an option for unrolled copying of long matches)
; ver.07 by spke for LZSA 1.1.1 (10/10/2019, +0.2% speed and an option for unrolled copying of long matches);
; ver.08 by spke (07-08/04/2022, 210(-6) bytes, +1.7% speed, using self-modifying code by default)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
@ -57,8 +58,7 @@
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+38 bytes)
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b (+5 bytes, -3.2% speed)
IFNDEF BACKWARD_DECOMPRESS
@ -67,7 +67,7 @@
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
add hl,de
ENDM
MACRO COPY1
@ -85,8 +85,9 @@
ENDM
MACRO ADD_OFFSET
ex de,hl : ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
; HL = DE - HL
ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 6*4 = 24t / 6 bytes
ENDM
MACRO COPY1
@ -99,37 +100,15 @@
ENDIF
IFNDEF HD64180
MACRO LD_IX_DE
ld ixl,e : ld ixh,d
ENDM
MACRO LD_DE_IX
ld e,ixl : ld d,ixh
ENDM
ELSE
MACRO LD_IX_DE
push de : pop ix
ENDM
MACRO LD_DE_IX
push ix : pop de
ENDM
ENDIF
@DecompressLZSA2:
; A' stores next nibble as %1111.... or assumed to contain trash
; B is assumed to be 0
; B is assumed to be 0 in many places
ld b,0 : scf : exa : jr ReadToken
ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,CopyLiterals
ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,CopyMoreLiterals
ld c,(hl) : NEXT_HL
ld a,b : ld b,(hl)
jr ReadToken.NEXTHLuseBC
@ -140,23 +119,31 @@ ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,CopyLiterals
MoreLiterals: ld b,(hl) : NEXT_HL
scf : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
; nibbles are read left-to-right; spare nibbles are kept in AF'
; and flag NC indicates that a nibble is available
ld a,(hl) : or a : exa
ld a,(hl) : NEXT_HL
rrca : rrca : rrca : rrca
.noUpdate ;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
.noUpdate or #F0
;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
inc a : jr z,ManyLiterals : sub #F0-3+1
CopyLiterals: ld c,a : ld a,b : ld b,0
COPYBC
push de : or a : jp p,CASE0xx ;: jr CASE1xx
CopyMoreLiterals: ld c,a : ld a,b : ld b,0
COPY1
COPY1
COPYBC
or a : jp p,CASE0xx
cp %11000000 : jr c,CASE10x
CASE11x cp %11100000 : jr c,CASE110
; "111": repeated offset
CASE111: LD_DE_IX : jr MatchLen
CASE11x cp %11100000 : jr nc,MatchLen
; "110": 16-bit offset
CASE110: ld b,(hl) : NEXT_HL : jr ReadOffsetC
@ -166,69 +153,66 @@ Literals0011: jr nz,MoreLiterals
; if "LL" of the byte token is equal to 0,
; there are no literals to copy
NoLiterals: or (hl) : NEXT_HL
push de : jp m,CASE1xx
jp m,CASE1xx
; short (5 or 9 bit long) offsets
CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x
CASE0xx cp %01000000 : jr c,CASE00x
; "01x": the case of the 9-bit offset
CASE01x: cp %01100000 : rl d
; "01x": the case of the 9-bit offset
CASE01x: dec b : cp %01100000 : rl b
ReadOffsetE ld e,(hl) : NEXT_HL
ReadOffsetC ld c,(hl) : NEXT_HL
SaveOffset: LD_IX_DE
SaveOffset ld (CopyMatch.PrevOffset),bc : ld b,0
MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a
MatchLen inc a : and %00000111 : jr z,LongerMatch : inc a
CopyMatch: ld c,a
.useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
COPY1
COPYBC
.popSrc pop hl
CopyMatch: ld c,a
.useC push hl
.PrevOffset EQU $+1 : ld hl,0
ADD_OFFSET
COPY1
COPYBC
.popSrc pop hl
; compressed data stream contains records
; each record begins with the byte token "XYZ|LL|MMM"
ReadToken: ld a,(hl) : and %00011000 : jp pe,Literals0011 ; process the cases 00 and 11 separately
rrca : rrca : rrca
rrca : rrca : rrca
ld c,a : ld a,(hl) ; token is re-read for further processing
.NEXTHLuseBC NEXT_HL
COPYBC
ld c,a : ld a,(hl) ; token is re-read for further processing
.NEXTHLuseBC NEXT_HL
COPYBC
; the token and literals are followed by the offset
push de : or a : jp p,CASE0xx
or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
; "10x": the case of the 13-bit offset
CASE10x: ld c,a : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
ld a,(hl) : or a : exa
ld a,(hl) : NEXT_HL
rrca : rrca : rrca : rrca
.noUpdate ld d,a : ld a,c
cp %10100000 : dec d : rl d : jr ReadOffsetE
.noUpdate or #F0 : ld b,a : ld a,c
cp %10100000 : dec b : rl b : jr ReadOffsetC
; "110": 16-bit offset
CASE110: ld d,(hl) : NEXT_HL : jr ReadOffsetE
; "00x": the case of the 5-bit offset
CASE00x: ld c,a : exa : jr nc,.noUpdate
CASE00x: ld b,a : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
ld a,(hl) : or a : exa
ld a,(hl) : NEXT_HL
rrca : rrca : rrca : rrca
.noUpdate ld e,a : ld a,c
cp %00100000 : rl e : jp SaveOffset
.noUpdate or #F0 : ld c,a : ld a,b
cp %00100000 : rl c
ld b,#FF : jr SaveOffset
@ -236,27 +220,27 @@ CASE00x: ld c,a : exa : jr nc,.noUpdate
LongerMatch: scf : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
ld a,(hl) : or a : exa
ld a,(hl) : NEXT_HL
rrca : rrca : rrca : rrca
.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch
.noUpdate or #F0 : sub #F0-9 : cp 15+9 : jr c,CopyMatch
IFNDEF UNROLL_LONG_MATCHES
LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch
ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC
pop de : ret
ret
ELSE
LongMatch: add (hl) : NEXT_HL : jr c,VeryLongMatch
ld c,a
.useC ex (sp),hl
.useC push hl
ld hl,(CopyMatch.PrevOffset)
ADD_OFFSET
COPY1
; this is an unrolled equivalent of LDIR
xor a : sub c
@ -271,7 +255,7 @@ LongMatch: add (hl) : NEXT_HL : jr c,VeryLongMatch
VeryLongMatch: ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,LongMatch.useC
pop de : ret
ret
ENDIF

View File

@ -1,5 +1,5 @@
;
; Size-optimized LZSA2 decompressor by spke & uniabis (139 bytes)
; Size-optimized LZSA2 decompressor by spke & uniabis (134 bytes)
;
; ver.00 by spke for LZSA 1.0.0 (02-09/06/2019, 145 bytes);
; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
@ -7,6 +7,8 @@
; ver.03 by spke for LZSA 1.0.7 (01/08/2019, 140(-4) bytes, -1.4% speed and small re-organization of macros);
; ver.04 by spke for LZSA 1.1.0 (26/09/2019, removed usage of IY, added full revision history)
; ver.05 by spke for LZSA 1.1.1 (11/10/2019, 139(-1) bytes, +0.1% speed)
; ver.06 by spke (11-12/04/2021, added some comments)
; ver.07 by spke (04-05/04/2022, 134(-5) bytes, +1% speed, using self-modifying code by default)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
@ -55,8 +57,8 @@
; 3. This notice may not be removed or altered from any source distribution.
;
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b (+5 bytes, -3% speed)
; DEFINE AVOID_SELFMODIFYING_CODE ; uncomment to disallow self-modifying code (-1 byte, -4% speed)
IFNDEF BACKWARD_DECOMPRESS
@ -65,7 +67,7 @@
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
add hl,de
ENDM
MACRO BLOCKCOPY
@ -79,7 +81,10 @@
ENDM
MACRO ADD_OFFSET
push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes
;push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes
; HL = DE - HL
ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 6*4 = 24t / 6 bytes
ENDM
MACRO BLOCKCOPY
@ -88,85 +93,90 @@
ENDIF
IFNDEF HD64180
@DecompressLZSA2:
; in many places we assume that B = 0
; flag P in A' signals the need to re-load the nibble store
xor a : ld b,a : exa : jr .ReadToken
MACRO LD_IX_DE
ld ixl,e : ld ixh,d
ENDM
.CASE00x: ; token "00Z" stands for 5-bit offsets
; (read a nibble for offset bits 1-4 and use the inverted bit Z
; of the token as bit 0 of the offset; set bits 5-15 of the offset to 1)
push af
call ReadNibble.skipLDCA : ld c,a
pop af
cp %00100000 : rl c : jr .SaveOffset
MACRO LD_DE_IX
ld e,ixl : ld d,ixh
ENDM
.CASE0xx dec b : cp %01000000 : jr c,.CASE00x
.CASE01x: ; token "01Z" stands for 9-bit offsets
; (read a byte for offset bits 0-7 and use the inverted bit Z
; for bit 8 of the offset; set bits 9-15 of the offset to 1)
cp %01100000
.doRLB rl b
.OffsetReadC: ld c,(hl) : NEXT_HL
IFNDEF AVOID_SELFMODIFYING_CODE
.SaveOffset: ld (.PrevOffset),bc : ld b,0
ELSE
MACRO LD_IX_DE
push de : pop ix
ENDM
MACRO LD_DE_IX
push ix : pop de
ENDM
.SaveOffset: push bc : pop ix : ld b,0
ENDIF
@DecompressLZSA2:
xor a : ld b,a : exa : jr ReadToken
.MatchLen: and %00000111 : add 2 : cp 9
call z,ExtendedCode
CASE00x: call ReadNibble
ld e,a : ld a,c
cp %00100000 : rl e : jr SaveOffset
.CopyMatch: ld c,a
push hl ; BC = len, DE = dest, HL = -offset, SP -> [src]
CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x
IFNDEF AVOID_SELFMODIFYING_CODE
.PrevOffset EQU $+1 : ld hl,0
ELSE
push ix : pop hl
ENDIF
ADD_OFFSET
BLOCKCOPY ; BC = 0, DE = dest
pop hl ; HL = src
CASE01x: cp %01100000 : rl d
.ReadToken: ld a,(hl) : NEXT_HL : push af
and %00011000 : jr z,.NoLiterals
OffsetReadE: ld e,(hl) : NEXT_HL
SaveOffset: LD_IX_DE
rrca : rrca : rrca
call pe,ExtendedCode
MatchLen: and %00000111 : add 2 : cp 9 : call z,ExtendedCode
ld c,a
BLOCKCOPY
CopyMatch: ld c,a
ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP -> [src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
BLOCKCOPY ; BC = 0, DE = dest
pop hl ; HL = src
.NoLiterals: pop af : or a : jp p,.CASE0xx
ReadToken: ld a,(hl) : NEXT_HL : push af
and %00011000 : jr z,NoLiterals
.CASE1xx cp %11000000 : jr c,.CASE10x
; token "111" stands for repeat offsets
; (reuse the offset value of the previous match command)
cp %11100000 : jr nc,.MatchLen
rrca : rrca : rrca
call pe,ExtendedCode
.CASE110: ; token "110" stands for 16-bit offset
; (read a byte for offset bits 8-15, then another byte for offset bits 0-7)
ld b,(hl) : NEXT_HL : jr .OffsetReadC
ld c,a
BLOCKCOPY
.CASE10x: ; token "10Z" stands for 13-bit offsets
; (read a nibble for offset bits 9-12 and use the inverted bit Z
; for bit 8 of the offset, then read a byte for offset bits 0-7.
; set bits 13-15 of the offset to 1. substract 512 from the offset to get the final value)
call ReadNibble : ld b,a
ld a,c : cp %10100000
dec b : jr .doRLB
NoLiterals: pop af : push de
or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
CASE10x: call ReadNibble
ld d,a : ld a,c
cp %10100000 ;: rl d
dec d : rl d : DB #CA ; jr OffsetReadE ; #CA is JP Z,.. to skip all commands in CASE110 before jr OffsetReadE
CASE110: ld d,(hl) : NEXT_HL : jr OffsetReadE
CASE11x cp %11100000 : jr c,CASE110
CASE111: LD_DE_IX : jr MatchLen
ExtendedCode: call ReadNibble : inc a : jr z,ExtraByte
sub #F0+1 : add c : ret
ExtraByte ld a,15 : add c : add (hl) : NEXT_HL : ret nc
ld a,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : ret nz
pop de : pop de ; RET is not needed, because RET from ReadNibble is sufficient
pop bc ; RET is not needed, because RET from ReadNibble is sufficient
ReadNibble: ld c,a : xor a : exa : ret m
UpdateNibble ld a,(hl) : or #F0 : exa
ReadNibble: ld c,a
.skipLDCA xor a : exa : ret m
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca : ret

View File

@ -30,8 +30,10 @@
*
*/
#define _POSIX_C_SOURCE 200808
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include "format.h"
#include "lib.h"

View File

@ -120,7 +120,7 @@ static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppI
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;

View File

@ -44,6 +44,6 @@
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize);
#endif /* _EXPAND_BLOCK_V1_H */

View File

@ -109,7 +109,7 @@ static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
@ -147,7 +147,7 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
}
if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned char nOffsetMode = token & 0xc0;
const unsigned char nOffsetMode = token & 0xc0;
unsigned int nValue;
switch (nOffsetMode) {

View File

@ -44,6 +44,6 @@
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize);
#endif /* _EXPAND_BLOCK_V2_H */

View File

@ -50,7 +50,7 @@
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion, const int nFlags) {
int lzsa_decompressor_expand_block(unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize, const int nFormatVersion, const int nFlags) {
int nDecompressedSize;
if (nFlags & LZSA_FLAG_RAW_BACKWARD) {

View File

@ -52,7 +52,7 @@ extern "C" {
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion, const int nFlags);
int lzsa_decompressor_expand_block(unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize, const int nFormatVersion, const int nFlags);
#ifdef __cplusplus
}

View File

@ -39,9 +39,6 @@
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
/*-------------- File API -------------- */
/**

View File

@ -60,10 +60,11 @@ int lzsa_get_frame_size(void) {
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nFormatVersion version of format to use (1-2)
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) {
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nFormatVersion) {
if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) {
pFrameData[0] = LZSA_ID_0; /* Magic number */
pFrameData[1] = LZSA_ID_1;
@ -146,6 +147,7 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
* @param nFormatVersion pointer to returned format version, if successful
*
* @return 0 for success, or -1 for failure
*/

View File

@ -56,10 +56,11 @@ int lzsa_get_frame_size(void);
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nFormatVersion version of format to use (1-2)
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion);
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nFormatVersion);
/**
* Encode compressed block frame header
@ -98,6 +99,7 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
* @param nFormatVersion pointer to returned format version, if successful
*
* @return 0 for success, or -1 for failure
*/

View File

@ -48,24 +48,6 @@
extern "C" {
#endif
/** High level status for compression and decompression */
typedef enum _lzsa_status_t {
LZSA_OK = 0, /**< Success */
LZSA_ERROR_SRC, /**< Error reading input */
LZSA_ERROR_DST, /**< Error reading output */
LZSA_ERROR_DICTIONARY, /**< Error reading dictionary */
LZSA_ERROR_MEMORY, /**< Out of memory */
/* Compression-specific status codes */
LZSA_ERROR_COMPRESSION, /**< Internal compression error */
LZSA_ERROR_RAW_TOOLARGE, /**< Input is too large to be compressed to a raw block */
LZSA_ERROR_RAW_UNCOMPRESSED, /**< Input is incompressible and raw blocks don't support uncompressed data */
/* Decompression-specific status codes */
LZSA_ERROR_FORMAT, /**< Invalid input format or magic number when decompressing */
LZSA_ERROR_DECOMPRESSION /**< Internal decompression error */
} lzsa_status_t;
/* Compression flags */
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
@ -78,11 +60,11 @@ typedef enum _lzsa_status_t {
* @param nBufferSize size of buffer in bytes
*/
static inline void lzsa_reverse_buffer(unsigned char *pBuffer, const int nBufferSize) {
int nMidPoint = nBufferSize / 2;
const int nMidPoint = nBufferSize / 2;
int i, j;
for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) {
unsigned char c = pBuffer[i];
const unsigned char c = pBuffer[i];
pBuffer[i] = pBuffer[j];
pBuffer[j] = c;
}

View File

@ -75,8 +75,8 @@ void divsufsort_destroy(divsufsort_ctx_t *ctx);
/**
* Constructs the suffix array of a given string.
* @param ctx suffix array context
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array of suffixes.
* @param T The input string.
* @param SA The output array of suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/

View File

@ -47,7 +47,7 @@
#define OPT_RAW_BACKWARD 8
#define OPT_STATS 16
#define TOOL_VERSION "1.3.6"
#define TOOL_VERSION "1.4.1"
/*---------------------------------------------------------------------------*/
@ -120,7 +120,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount, &nSafeDist, &stats);
if ((nOptions & OPT_VERBOSE)) {
if (nOptions & OPT_VERBOSE) {
nEndTime = do_get_time();
}
@ -139,7 +139,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
if (nStatus)
return 100;
if ((nOptions & OPT_VERBOSE)) {
if (nOptions & OPT_VERBOSE) {
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n",
@ -383,13 +383,13 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con
/*---------------------------------------------------------------------------*/
static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, int nMinMatchSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) {
static void generate_compressible_data(unsigned char *pBuffer, const size_t nBufferSize, const int nMinMatchSize, const unsigned int nSeed, const int nNumLiteralValues, const float fMatchProbability) {
size_t nIndex = 0;
int nMatchProbability = (int)(fMatchProbability * 1023.0f);
const int nMatchProbability = (int)(fMatchProbability * 1023.0f);
srand(nSeed);
if (nIndex >= nBufferSize) return;
if (nBufferSize == 0) return;
pBuffer[nIndex++] = rand() % nNumLiteralValues;
while (nIndex < nBufferSize) {
@ -423,14 +423,12 @@ static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSiz
}
}
static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) {
static void xor_data(unsigned char *pBuffer, const size_t nBufferSize, const unsigned int nSeed, const float fXorProbability) {
size_t nIndex = 0;
int nXorProbability = (int)(fXorProbability * 1023.0f);
const int nXorProbability = (const int)(fXorProbability * 1023.0f);
srand(nSeed);
if (nIndex >= nBufferSize) return;
while (nIndex < nBufferSize) {
if ((rand() & 1023) < nXorProbability) {
pBuffer[nIndex] ^= 0xff;
@ -439,7 +437,7 @@ static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nS
}
}
static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) {
static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) {
unsigned char *pGeneratedData;
unsigned char *pCompressedData;
unsigned char *pTmpCompressedData;
@ -470,7 +468,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
fprintf(stderr, "out of memory, %zu bytes needed\n", nMaxCompressedDataSize);
return 100;
}
@ -481,7 +479,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
fprintf(stderr, "out of memory, %zu bytes needed\n", nMaxCompressedDataSize);
return 100;
}
@ -514,7 +512,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= ((size_t)((nOptions & OPT_RAW) ? BLOCK_SIZE : (4 * BLOCK_SIZE))); nGeneratedDataSize += nDataSizeStep) {
float fMatchProbability;
fprintf(stdout, "size %zd", nGeneratedDataSize);
fprintf(stdout, "size %zu", nGeneratedDataSize);
for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 };
float fXorProbability;
@ -529,7 +527,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
/* Try to compress it, expected to succeed */
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
nFlags, nMinMatchSize, nFormatVersion);
if (nActualCompressedSize == -1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
if (nActualCompressedSize == (size_t)-1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
@ -539,7 +537,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
fprintf(stderr, "\nself-test: error compressing size %zu, seed %u, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
@ -547,7 +545,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
size_t nActualDecompressedSize;
int nDecFormatVersion = nFormatVersion;
nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, nFlags, &nDecFormatVersion);
if (nActualDecompressedSize == -1) {
if (nActualDecompressedSize == (size_t)-1) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
@ -557,7 +555,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
fprintf(stderr, "\nself-test: error decompressing size %zu, seed %u, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
@ -571,7 +569,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zu, seed %u, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
@ -651,7 +649,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
pFileData = (unsigned char*)malloc(nFileSize);
if (!pFileData) {
fclose(f_in);
fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
fprintf(stderr, "out of memory for reading '%s', %zu bytes needed\n", pszInFilename, nFileSize);
return 100;
}
@ -671,7 +669,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048);
if (!pCompressedData) {
free(pFileData);
fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
fprintf(stderr, "out of memory for compressing '%s', %zu bytes needed\n", pszInFilename, nMaxCompressedSize);
return 100;
}
@ -693,7 +691,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
long long t0 = do_get_time();
nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion);
long long t1 = do_get_time();
if (nActualCompressedSize == -1) {
if (nActualCompressedSize == (size_t)-1) {
free(pCompressedData);
free(pFileData);
fprintf(stderr, "compression error\n");
@ -701,7 +699,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
}
long long nCurDecTime = t1 - t0;
if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
if (nBestCompTime == (size_t)-1 || nBestCompTime > nCurDecTime)
nBestCompTime = nCurDecTime;
/* Check guard bytes before the output buffer */
@ -742,7 +740,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
free(pCompressedData);
free(pFileData);
fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize);
fprintf(stdout, "compressed size: %zu bytes\n", nActualCompressedSize);
fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0));
return 0;
@ -783,7 +781,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
pFileData = (unsigned char*)malloc(nFileSize);
if (!pFileData) {
fclose(f_in);
fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
fprintf(stderr, "out of memory for reading '%s', %zu bytes needed\n", pszInFilename, nFileSize);
return 100;
}
@ -802,7 +800,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
nMaxDecompressedSize = 65536;
else
nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize);
if (nMaxDecompressedSize == -1) {
if (nMaxDecompressedSize == (size_t)-1) {
free(pFileData);
fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
return 100;
@ -811,7 +809,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
pDecompressedData = (unsigned char*)malloc(nMaxDecompressedSize);
if (!pDecompressedData) {
free(pFileData);
fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
fprintf(stderr, "out of memory for decompressing '%s', %zu bytes needed\n", pszInFilename, nMaxDecompressedSize);
return 100;
}
@ -824,7 +822,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
long long t0 = do_get_time();
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, nFlags, &nFormatVersion);
long long t1 = do_get_time();
if (nActualDecompressedSize == -1) {
if (nActualDecompressedSize == (size_t)-1) {
free(pDecompressedData);
free(pFileData);
fprintf(stderr, "decompression error\n");
@ -832,7 +830,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
}
long long nCurDecTime = t1 - t0;
if (nBestDecTime == -1 || nBestDecTime > nCurDecTime)
if (nBestDecTime == (size_t)-1 || nBestDecTime > nCurDecTime)
nBestDecTime = nCurDecTime;
}
@ -852,7 +850,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
free(pFileData);
fprintf(stdout, "format: LZSA%d\n", nFormatVersion);
fprintf(stdout, "decompressed size: %zd bytes\n", nActualDecompressedSize);
fprintf(stdout, "decompressed size: %zu bytes\n", nActualDecompressedSize);
fprintf(stdout, "decompression time: %lld microseconds (%g Mb/s)\n", nBestDecTime, ((double)nActualDecompressedSize / 1024.0) / ((double)nBestDecTime / 1000.0));
return 0;
@ -1037,11 +1035,11 @@ int main(int argc, char **argv) {
nArgsError = 1;
}
else if (!strcmp(argv[i], "-stats")) {
if ((nOptions & OPT_STATS) == 0) {
nOptions |= OPT_STATS;
}
else
nArgsError = 1;
if ((nOptions & OPT_STATS) == 0) {
nOptions |= OPT_STATS;
}
else
nArgsError = 1;
}
else {
if (!pszInFilename)

View File

@ -33,7 +33,6 @@
#include <string.h>
#include "matchfinder.h"
#include "format.h"
#include "lib.h"
/**
* Hash index into TAG_BITS
@ -77,7 +76,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
PLCP[i] = 0;
continue;
}
int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
const int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
PLCP[i] = nCurLen;
if (nCurLen > 0)
@ -88,11 +87,11 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
* saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
* and the interval builder below doesn't need it either. */
intervals[0] &= POS_MASK;
int nMinMatchSize = pCompressor->min_match_size;
const int nMinMatchSize = pCompressor->min_match_size;
if (pCompressor->format_version >= 2) {
for (i = 1; i < nInWindowSize; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
const int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
@ -106,7 +105,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
}
else {
for (i = 1; i < nInWindowSize; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
const int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
@ -196,14 +195,15 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
*
* @return number of matches
*/
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize) {
static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref;
unsigned int super_ref;
unsigned int match_pos;
lzsa_match *matchptr;
int nPrevOffset = 0;
unsigned int nPrevOffset = 0;
unsigned char nV1OffsetFound[2] = { 0, 0 };
/**
* Find matches using intervals
@ -240,11 +240,11 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->offset = (unsigned short)nMatchOffset;
matchptr->length = (const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
@ -258,11 +258,11 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (unsigned short)nMatchOffset;
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
@ -277,17 +277,30 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
pos_data[match_pos] = ref;
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
if (pCompressor->format_version >= 2) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->length = (const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
else {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
unsigned int nV1OffsetType = (nMatchOffset <= 256) ? 0 : 1;
if (!nV1OffsetFound[nV1OffsetType]) {
matchptr->length = (const unsigned short)(ref >> LCP_SHIFT);
matchptr->offset = (const unsigned short)nMatchOffset;
if (matchptr->length < 256)
nV1OffsetFound[nV1OffsetType] = 1;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
}
}
@ -298,13 +311,14 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (unsigned short)nMatchOffset;
if (nMatchOffset <= MAX_OFFSET) {
const unsigned short nMatchLen = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS)));
if ((matchptr->length & 0x7fff) > 2) {
if (nMatchLen > 2) {
matchptr->length = nMatchLen | 0x8000;
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
@ -348,12 +362,10 @@ void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOf
int i;
for (i = nStartOffset; i < nEndOffset; i++) {
int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, nMatchesPerOffset, nEndOffset - nStartOffset);
const int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, nMatchesPerOffset, nEndOffset - nStartOffset);
while (nMatches < nMatchesPerOffset) {
pMatch[nMatches].length = 0;
pMatch[nMatches].offset = 0;
nMatches++;
if (nMatches < nMatchesPerOffset) {
memset(pMatch + nMatches, 0, (nMatchesPerOffset - nMatches) * sizeof(lzsa_match));
}
pMatch += nMatchesPerOffset;

View File

@ -33,14 +33,12 @@
#ifndef _MATCHFINDER_H
#define _MATCHFINDER_H
#include "shrink_context.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
@ -52,19 +50,6 @@ typedef struct _lzsa_compressor lzsa_compressor;
*/
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return number of matches
*/
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize);
/**
* Skip previously compressed bytes
*

View File

@ -67,7 +67,7 @@ static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, const int nLength) {
if (nLength >= LITERALS_RUN_LEN_V1) {
if (nLength < 256)
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
@ -118,7 +118,7 @@ static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*/
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, const int nLength) {
if (nLength >= MATCH_RUN_LEN_V1) {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
@ -141,13 +141,11 @@ static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOf
/**
* Get offset encoding cost in bits
*
* @param nMatchOffset offset to get cost of
* @param __nMatchOffset offset to get cost of
*
* @return cost in bits
*/
static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
return (nMatchOffset <= 256) ? 8 : 16;
}
#define lzsa_get_offset_cost_v1(__nMatchOffset) (((__nMatchOffset) <= 256) ? 8 : 16)
/**
* Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input
@ -155,87 +153,93 @@ static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param nReduce non-zero to reduce the number of tokens when the path costs are equal, zero not to
*/
static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset, const int nReduce) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1);
const int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
int i, j, n;
int i;
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
memset(arrival + (nStartOffset << ARRIVALS_PER_POSITION_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << ARRIVALS_PER_POSITION_SHIFT));
for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT_V1); i += NARRIVALS_PER_POSITION_V1) {
lzsa_arrival* cur_arrival = &arrival[i];
int j;
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1;
memset(cur_arrival, 0, sizeof(lzsa_arrival) * NARRIVALS_PER_POSITION_V1);
for (j = 0; j < NARRIVALS_PER_POSITION_V1; j++)
cur_arrival[j].cost = 0x40000000;
}
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1].cost = 0;
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1].from_slot = -1;
for (i = nStartOffset; i != nEndOffset; i++) {
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
int m;
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT_V1];
lzsa_arrival* pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT_V1];
int j, m;
for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) {
int nPrevCost = cur_arrival[j].cost;
const int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nScore = cur_arrival[j].score + 1;
int nNumLiterals = cur_arrival[j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
const int nScore = cur_arrival[j].score + 1;
const int nNumLiterals = cur_arrival[j].num_literals + 1;
int n;
if (nNumLiterals == 1)
nCodingChoiceCost += nModeSwitchPenalty;
else if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
lzsa_arrival *pDestSlots = &arrival[(i + 1) << ARRIVALS_PER_POSITION_SHIFT];
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n + 1],
&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n],
if (nCodingChoiceCost < pDestLiteralSlots[n].cost ||
(nCodingChoiceCost == pDestLiteralSlots[n].cost && nScore < (pDestLiteralSlots[n].score + nDisableScore))) {
memmove(&pDestLiteralSlots[n + 1],
&pDestLiteralSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
lzsa_arrival* pDestArrival = &pDestLiteralSlots[n];
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
pDestArrival->from_slot = j + 1;
pDestArrival->from_pos = i - nStartOffset;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->score = nScore;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
break;
}
}
}
const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
int nNumArrivalsForThisPos = j;
const int nNumArrivalsForThisPos = j;
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if (nNumArrivalsForThisPos != 0) {
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
const int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT];
lzsa_arrival* pDestSlots = &cur_arrival[k << ARRIVALS_PER_POSITION_SHIFT_V1];
int nCodingChoiceCost = cur_arrival[0].cost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0, n;
for (j = 0; j < nNumArrivalsForThisPos; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0;
if (!cur_arrival[j].num_literals)
if (!cur_arrival[0].num_literals)
nCodingChoiceCost += nModeSwitchPenalty;
for (n = 0;
@ -248,28 +252,21 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
}
if (!exists) {
int nScore = cur_arrival[j].score + 5;
const int nScore = cur_arrival[0].score + 5;
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (nCodingChoiceCost < pDestSlots[0].cost ||
(nCodingChoiceCost == pDestSlots[0].cost && nScore < (pDestSlots[0].score + nDisableScore))) {
memmove(&pDestSlots[1],
&pDestSlots[0],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - 1));
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = match[m].offset;
j = NARRIVALS_PER_POSITION_V1;
break;
}
pDestSlots->cost = nCodingChoiceCost;
pDestSlots->rep_offset = match[m].offset;
pDestSlots->from_slot = 1;
pDestSlots->from_pos = i - nStartOffset;
pDestSlots->match_len = k;
pDestSlots->num_literals = 0;
pDestSlots->score = nScore;
}
}
}
@ -277,17 +274,13 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
}
}
lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
const lzsa_arrival *end_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT_V1];
lzsa_match *pBestMatch = pCompressor->best_match - nStartOffset;
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
if (end_arrival->from_pos >= nEndOffset) return;
pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
if (end_arrival->match_len)
pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset;
else
pBestMatch[end_arrival->from_pos].offset = 0;
end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)];
while (end_arrival->from_slot > 0 && (end_arrival->from_pos + nStartOffset) < nEndOffset) {
pBestMatch[end_arrival->from_pos + nStartOffset].length = end_arrival->match_len;
pBestMatch[end_arrival->from_pos + nStartOffset].offset = (end_arrival->match_len) ? end_arrival->rep_offset: 0;
end_arrival = &arrival[((end_arrival->from_pos + nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V1) + (end_arrival->from_slot - 1)];
}
}
@ -297,13 +290,13 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param pBestMatch optimal matches to emit
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset) {
lzsa_match *pBestMatch = pCompressor->best_match - nStartOffset;
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
@ -319,8 +312,8 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);
const int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
const int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);
if ((nReducedLenSize - nCurLenSize) <= 8) {
/* Merge */
@ -349,7 +342,7 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >=
(8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) {
/* Reduce */
int nMatchLen = pMatch->length;
const int nMatchLen = pMatch->length;
int j;
for (j = 0; j < nMatchLen; j++) {
@ -361,31 +354,31 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
}
}
if ((i + pMatch->length) <= nEndOffset && pMatch->offset > 0 && pMatch->length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + pMatch->length].offset > 0 &&
if ((i + pMatch->length) < nEndOffset && pMatch->offset && pMatch->length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + pMatch->length].offset &&
pBestMatch[i + pMatch->length].length >= MIN_MATCH_SIZE_V1 &&
(pMatch->length + pBestMatch[i + pMatch->length].length) >= LEAVE_ALONE_MATCH_SIZE &&
(pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN &&
(i + pMatch->length) > pMatch->offset &&
(i + pMatch->length) > pBestMatch[i + pMatch->length].offset &&
(i + pMatch->length) >= pMatch->offset &&
(i + pMatch->length) >= pBestMatch[i + pMatch->length].offset &&
(i + pMatch->length + pBestMatch[i + pMatch->length].length) <= nEndOffset &&
!memcmp(pInWindow + i - pMatch->offset + pMatch->length,
pInWindow + i + pMatch->length - pBestMatch[i + pMatch->length].offset,
pBestMatch[i + pMatch->length].length)) {
int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1);
nCurPartialSize += 8 /* token */ + lzsa_get_literals_varlen_size_v1(0) + ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
nCurPartialSize += 8 /* token */ + /* lzsa_get_literals_varlen_size_v1(0) + */ ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
const int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
if (nCurPartialSize >= nReducedPartialSize) {
int nMatchLen = pMatch->length;
const int nMatchLen = pMatch->length;
/* Join */
pMatch->length += pBestMatch[i + nMatchLen].length;
pBestMatch[i + nMatchLen].length = 0;
pBestMatch[i + nMatchLen].offset = 0;
pBestMatch[i + nMatchLen].length = -1;
nDidReduce = 1;
continue;
}
}
@ -402,60 +395,10 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
return nDidReduce;
}
/**
* Get compressed data block size
*
* @param pCompressor compression context
* @param pBestMatch optimal matches to emit
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return size of compressed data that will be written to output buffer
*/
static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nCompressedSize = 0;
for (i = nStartOffset; i < nEndOffset; ) {
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
i += nMatchLen;
}
else {
nNumLiterals++;
i++;
}
}
{
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
nCompressedSize += 8 * 4;
}
return nCompressedSize;
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pBestMatch optimal matches to emit
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
@ -464,7 +407,8 @@ static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
const lzsa_match *pBestMatch = pCompressor->best_match - nStartOffset;
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
@ -474,13 +418,13 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
const int nMatchOffset = pMatch->offset;
const int nMatchLen = pMatch->length;
const int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
const int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
@ -542,7 +486,7 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
i += nMatchLen;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
int nCurSafeDist = (i - nStartOffset) - nOutOffset;
const int nCurSafeDist = (i - nStartOffset) - nOutOffset;
if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
pCompressor->safe_dist = nCurSafeDist;
}
@ -558,8 +502,8 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
@ -567,7 +511,7 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) /* | 0x00 */;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
@ -580,11 +524,10 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
int nCurSafeDist = (i - nStartOffset) - nOutOffset;
const int nCurSafeDist = (i - nStartOffset) - nOutOffset;
if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
pCompressor->safe_dist = nCurSafeDist;
}
@ -620,11 +563,11 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int nNumLiterals = nEndOffset - nStartOffset;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nNumLiterals = nEndOffset - nStartOffset;
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nOutOffset = 0;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4;
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
@ -636,7 +579,6 @@ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, co
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nStartOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
@ -664,45 +606,28 @@ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, co
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nResult, nBaseCompressedSize;
int nResult;
/* Compress optimally without breaking ties in favor of less tokens */
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */);
if (nInDataSize < 65536) {
lzsa_optimize_forward_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */);
}
else {
lzsa_optimize_forward_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */);
}
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
nBaseCompressedSize = lzsa_get_compressed_size_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
lzsa_match *pBestMatch = pCompressor->best_match - nPreviousBlockSize;
if (nBaseCompressedSize > 0 && nInDataSize < 65536) {
int nReducedCompressedSize;
/* Compress optimally and do break ties in favor of less tokens */
memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */);
nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
nReducedCompressedSize = lzsa_get_compressed_size_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (nReducedCompressedSize > 0 && nReducedCompressedSize <= nBaseCompressedSize) {
/* Pick the parse with the reduced number of tokens as it didn't negatively affect the size */
pBestMatch = pCompressor->improved_match - nPreviousBlockSize;
}
}
nResult = lzsa_write_block_v1(pCompressor, pBestMatch, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
if (nResult < 0 && pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
nResult = lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
if (nResult < 0 && (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)) {
nResult = lzsa_write_raw_uncompressed_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}

View File

@ -33,8 +33,11 @@
#ifndef _SHRINK_BLOCK_V1_H
#define _SHRINK_BLOCK_V1_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
#include "shrink_context.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
@ -50,4 +53,8 @@ typedef struct _lzsa_compressor lzsa_compressor;
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_BLOCK_V1_H */

File diff suppressed because it is too large Load Diff

View File

@ -33,8 +33,11 @@
#ifndef _SHRINK_BLOCK_V2_H
#define _SHRINK_BLOCK_V2_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
#include "shrink_context.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
@ -50,4 +53,8 @@ typedef struct _lzsa_compressor lzsa_compressor;
*/
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_BLOCK_V2_H */

View File

@ -45,14 +45,15 @@
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
int nResult;
int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
int nMaxMinMatchForFormat = (nFormatVersion == 1) ? 5 : 3;
const int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
const int nMaxMinMatchForFormat = (nFormatVersion == 1) ? 5 : 3;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
pCompressor->intervals = NULL;
@ -60,12 +61,12 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->best_match = NULL;
pCompressor->improved_match = NULL;
pCompressor->arrival = NULL;
pCompressor->rep_slot_handled_mask = NULL;
pCompressor->rep_len_handled_mask = NULL;
pCompressor->first_offset_for_byte = NULL;
pCompressor->next_offset_for_pos = NULL;
pCompressor->offset_cache = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
@ -93,38 +94,37 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT) * sizeof(lzsa_arrival));
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT_V2) * sizeof(lzsa_arrival));
if (pCompressor->arrival) {
pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
if (pCompressor->best_match) {
pCompressor->improved_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
if (pCompressor->improved_match) {
if (pCompressor->format_version == 2)
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V2 * sizeof(lzsa_match));
else
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->rep_slot_handled_mask = (char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_slot_handled_mask) {
pCompressor->rep_len_handled_mask = (char*)malloc(((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_len_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
if (pCompressor->format_version == 2)
pCompressor->match = (lzsa_match*)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V2 * sizeof(lzsa_match));
else
pCompressor->match = (lzsa_match*)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->rep_slot_handled_mask = (unsigned char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(unsigned char));
if (pCompressor->rep_slot_handled_mask) {
pCompressor->rep_len_handled_mask = (unsigned char*)malloc(((LCP_MAX + 1) / 8) * sizeof(unsigned char));
if (pCompressor->rep_len_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
pCompressor->offset_cache = (int*)malloc(2048 * sizeof(int));
if (pCompressor->offset_cache) {
return 0;
}
}
}
}
}
else {
return 0;
}
}
else {
return 0;
}
}
}
@ -146,6 +146,11 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->offset_cache) {
free(pCompressor->offset_cache);
pCompressor->offset_cache = NULL;
}
if (pCompressor->next_offset_for_pos) {
free(pCompressor->next_offset_for_pos);
pCompressor->next_offset_for_pos = NULL;
@ -171,11 +176,6 @@ void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
pCompressor->match = NULL;
}
if (pCompressor->improved_match) {
free(pCompressor->improved_match);
pCompressor->improved_match = NULL;
}
if (pCompressor->arrival) {
free(pCompressor->arrival);
pCompressor->arrival = NULL;

View File

@ -52,10 +52,12 @@ extern "C" {
#define NARRIVALS_PER_POSITION_V1 8
#define NARRIVALS_PER_POSITION_V2_SMALL 9
#define NARRIVALS_PER_POSITION_V2_BIG 32
#define ARRIVALS_PER_POSITION_SHIFT 5
#define NARRIVALS_PER_POSITION_V2_MAX 64
#define ARRIVALS_PER_POSITION_SHIFT_V1 3
#define ARRIVALS_PER_POSITION_SHIFT_V2 6
#define NMATCHES_PER_INDEX_V1 8
#define MATCHES_PER_INDEX_SHIFT_V1 3
#define NMATCHES_PER_INDEX_V1 16
#define MATCHES_PER_INDEX_SHIFT_V1 4
#define NMATCHES_PER_INDEX_V2 64
#define MATCHES_PER_INDEX_SHIFT_V2 6
@ -72,16 +74,16 @@ typedef struct _lzsa_match {
} lzsa_match;
/** Forward arrival slot */
typedef struct {
typedef struct _lzsa_arrival {
int cost;
unsigned short rep_offset;
short from_slot;
int from_pos;
unsigned short from_pos;
unsigned short rep_len;
unsigned short match_len;
unsigned short num_literals;
int rep_pos;
int num_literals;
int score;
} lzsa_arrival;
@ -122,12 +124,12 @@ typedef struct _lzsa_compressor {
unsigned int *open_intervals;
lzsa_match *match;
lzsa_match *best_match;
lzsa_match *improved_match;
lzsa_arrival *arrival;
char *rep_slot_handled_mask;
char *rep_len_handled_mask;
unsigned char *rep_slot_handled_mask;
unsigned char *rep_len_handled_mask;
int *first_offset_for_byte;
int *next_offset_for_pos;
int *offset_cache;
int min_match_size;
int format_version;
int flags;
@ -142,6 +144,7 @@ typedef struct _lzsa_compressor {
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure

View File

@ -33,16 +33,13 @@
#ifndef _SHRINK_STREAMING_H
#define _SHRINK_STREAMING_H
#include "shrink_context.h"
#include "stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
typedef struct _lzsa_stats lzsa_stats;
/*-------------- File API -------------- */
/**

View File

@ -34,13 +34,17 @@
#include <stdlib.h>
#include <string.h>
#include "stream.h"
#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
/**
* Close file stream
*
* @param stream stream
*/
static void lzsa_filestream_close(lzsa_stream_t *stream) {
static void lzsa_filestream_close(struct _lzsa_stream_t *stream) {
if (stream->obj) {
fclose((FILE*)stream->obj);
stream->obj = NULL;
@ -60,7 +64,7 @@ static void lzsa_filestream_close(lzsa_stream_t *stream) {
*
* @return number of bytes read
*/
static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size) {
static size_t lzsa_filestream_read(struct _lzsa_stream_t *stream, void *ptr, size_t size) {
return fread(ptr, 1, size, (FILE*)stream->obj);
}
@ -73,7 +77,7 @@ static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size
*
* @return number of bytes written
*/
static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t size) {
static size_t lzsa_filestream_write(struct _lzsa_stream_t *stream, void *ptr, size_t size) {
return fwrite(ptr, 1, size, (FILE*)stream->obj);
}
@ -84,7 +88,7 @@ static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t siz
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
static int lzsa_filestream_eof(lzsa_stream_t *stream) {
static int lzsa_filestream_eof(struct _lzsa_stream_t *stream) {
return feof((FILE*)stream->obj);
}
@ -98,7 +102,32 @@ static int lzsa_filestream_eof(lzsa_stream_t *stream) {
* @return 0 for success, nonzero for failure
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode) {
stream->obj = (void*)fopen(pszInFilename, pszMode);
const char* stdInOutFile = "-";
const char* stdInMode = "rb";
const char* stdOutMode = "wb";
#ifdef _WIN32
int result;
#endif
if (!strncmp(pszInFilename, stdInOutFile, 1)) {
if (!strncmp(pszMode, stdInMode, 2)) {
#ifdef _WIN32
result = _setmode(_fileno(stdin), _O_BINARY);
#endif
stream->obj = stdin;
} else if (!strncmp(pszMode, stdOutMode, 2)) {
#ifdef _WIN32
result = _setmode(_fileno(stdout), _O_BINARY);
#endif
stream->obj = stdout;
} else {
return -1;
}
} else {
stream->obj = (void*)fopen(pszInFilename, pszMode);
}
if (stream->obj) {
stream->read = lzsa_filestream_read;
stream->write = lzsa_filestream_write;

View File

@ -37,8 +37,23 @@
extern "C" {
#endif
/* Forward declaration */
typedef struct _lzsa_stream_t lzsa_stream_t;
/** High level status for compression and decompression */
typedef enum _lzsa_status_t {
LZSA_OK = 0, /**< Success */
LZSA_ERROR_SRC, /**< Error reading input */
LZSA_ERROR_DST, /**< Error reading output */
LZSA_ERROR_DICTIONARY, /**< Error reading dictionary */
LZSA_ERROR_MEMORY, /**< Out of memory */
/* Compression-specific status codes */
LZSA_ERROR_COMPRESSION, /**< Internal compression error */
LZSA_ERROR_RAW_TOOLARGE, /**< Input is too large to be compressed to a raw block */
LZSA_ERROR_RAW_UNCOMPRESSED, /**< Input is incompressible and raw blocks don't support uncompressed data */
/* Decompression-specific status codes */
LZSA_ERROR_FORMAT, /**< Invalid input format or magic number when decompressing */
LZSA_ERROR_DECOMPRESSION /**< Internal decompression error */
} lzsa_status_t;
/* I/O stream */
typedef struct _lzsa_stream_t {
@ -54,7 +69,7 @@ typedef struct _lzsa_stream_t {
*
* @return number of bytes read
*/
size_t(*read)(lzsa_stream_t *stream, void *ptr, size_t size);
size_t(*read)(struct _lzsa_stream_t *stream, void *ptr, size_t size);
/**
* Write to stream
@ -65,7 +80,7 @@ typedef struct _lzsa_stream_t {
*
* @return number of bytes written
*/
size_t(*write)(lzsa_stream_t *stream, void *ptr, size_t size);
size_t(*write)(struct _lzsa_stream_t *stream, void *ptr, size_t size);
/**
@ -75,14 +90,14 @@ typedef struct _lzsa_stream_t {
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
int(*eof)(lzsa_stream_t *stream);
int(*eof)(struct _lzsa_stream_t *stream);
/**
* Close stream
*
* @param stream stream
*/
void(*close)(lzsa_stream_t *stream);
void(*close)(struct _lzsa_stream_t *stream);
} lzsa_stream_t;
/**