359 Commits
0.4.1 ... 1.3.4

Author SHA1 Message Date
01228f3eeb Bump version 2020-08-18 11:57:25 +02:00
e3fd315541 Faster LZSA1 compression 2020-08-18 11:51:24 +02:00
5a0da16874 Increase LZSA2 ratio for some input files 2020-08-18 09:13:54 +02:00
028007b57c Bump version 2020-08-02 09:40:31 +02:00
4682b2e917 Small simplification 2020-07-29 15:23:22 +02:00
060f5d3350 Simplify code, compress LZSA2 another 15% faster 2020-07-29 13:01:24 +02:00
33eec56b9b Bump version 2020-07-27 15:36:01 +02:00
90fa770458 Compress another 8% faster 2020-07-27 13:25:16 +02:00
b2971da2b4 Nicer code 2020-07-26 16:38:22 +02:00
3fb9dc54b1 Compress LZSA2 another 3% faster 2020-07-26 10:07:03 +02:00
00d1d95625 Small improvement 2020-07-24 19:18:46 +02:00
e4f013f2db Compress LZSA2 17% faster 2020-07-24 17:14:01 +02:00
703ff19a3a Bump version 2020-07-14 23:50:44 +02:00
fc5081fb1a Rename confusing definitions 2020-07-14 22:36:38 +02:00
61698b5036 Another LZSA2 compression speedup 2020-07-14 17:01:07 +02:00
cf49af5cda Faster LZSA2 compression 2020-07-14 12:36:56 +02:00
c39158eea8 Compress LZSA2 faster, clean code up 2020-07-13 19:34:07 +02:00
4864f3c184 Compress LZSA1 a little faster 2020-07-10 17:45:13 +02:00
8ed768aafc Nicer and faster code 2020-07-10 08:55:45 +02:00
9c7495f458 Compress LZSA2 ~12% faster 2020-07-06 12:47:56 +02:00
afbb1de16c Merge pull request #48 from dougmasten/dev
More optimizations to the 6809 LZSA depackers
2020-06-28 15:46:17 +02:00
078edef880 Optimize match offset code in 6809 LZSA2 depacker 2020-06-27 04:17:05 -05:00
03692fca2c Update code byte counts for 6809 LZSA depackers 2020-06-27 02:02:33 -05:00
39e11422ec Delay clearing high part of literals count until we really have to in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
fde15d3fb0 Move instruction before branch to save one byte in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
fc8120f0da Optimize handling of 9 bits offset in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
c7b3ffc067 Delay clearing high part of literals count until we really have to in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
137c6201be One byte saving for setting reg A to $FF in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
e397428c1f Remove trailing whitespaces in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
b8cfbbbc7b Optimize handling of token's Z flag bit in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
28ca829924 delay clearing high part of literals count until we really have to in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
27562f4761 Restructure code to eliminate one BRA instruction from loop in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
0307d228a0 Merge pull request #3 from dougmasten/master
Merge pull request
2020-06-26 17:13:02 -05:00
e8b2ebb89f Merge pull request #2 from emmanuel-marty/master
Merge pull request
2020-06-26 17:08:44 -05:00
f72133f4cf Move instruction before branch to save one byte in 6809 LZSA2 depacker 2020-06-26 16:48:58 -05:00
56ba563794 One byte saving for setting reg A to $FF in 6809 LZSA2 depacker 2020-06-26 15:59:24 -05:00
c0f09db364 Delay clearing high part of literals count until we really have to in 6809 LZSA2 depacker 2020-06-26 15:56:28 -05:00
99db30a732 Optimize handling of 9 bits offset in 6809 LZSA2 depacker 2020-06-26 15:18:53 -05:00
061ca99838 Optimize handling of token's Z flag bit in 6809 LZSA2 depacker 2020-06-26 14:52:09 -05:00
7b96368469 Optimize match offset code in 6809 LZSA2 depacker 2020-06-26 14:42:36 -05:00
e9540b2e3d Remove unnecessary "ADDB #$18" as register B will always have this value from 6809 LZSA2 depacker 2020-06-24 00:49:09 -05:00
40212975c2 Bump version 2020-06-22 10:08:40 +02:00
07c3969432 Compress LZSA2 raw files one byte shorter 2020-06-22 00:13:14 +02:00
6a47ed7f41 Remove unnecessary "ADDB #$18" as B register will always have this value from 6809 LZSA depacker 2020-06-21 12:32:54 -05:00
06d63de9d7 Rearrange "CLRA" instruction for slight speed optimization in 6809 LZSA depacker 2020-06-21 12:16:07 -05:00
b5b8ca556a Rearrange match offset code to save 2 bytes in 6809 LZSA depacker 2020-06-21 01:09:15 -05:00
f724663ba8 Update README some more 2020-06-21 00:04:03 +02:00
798c07f6e0 Update README again 2020-06-20 23:59:01 +02:00
2f79779beb Update README 2020-06-20 23:57:22 +02:00
a9b5a7511c Merge pull request #47 from dougmasten/dom
Various optimizations to the 6809 LZSA depackers
2020-06-20 21:24:28 +02:00
7f0316b81c Update code byte counts for 6809 LZSA depackers 2020-06-20 12:45:51 -05:00
e8edc3242d Restructure code to eliminate one BRA instruction in unlzsa1.s 6809 LZSA depacker 2020-06-20 12:27:31 -05:00
1dd65731c4 Remove unnecessary "ADDB #$12" as B register will always have this value from 6809 LZSA depackers 2020-06-20 12:27:22 -05:00
f233d552ca Remove unnecessary "TSTB" instruction from 6809 LZSA depackers 2020-06-20 12:11:35 -05:00
8c99570b06 Merge pull request #1 from emmanuel-marty/master
Add backward depackers for 6809
2020-06-20 12:03:30 -05:00
ee969968c1 Add backward depackers for 6809 2020-06-20 14:41:25 +02:00
f920485899 -3 bytes for 6809 LZSA2 depacker, slightly faster 2020-06-19 20:08:09 +02:00
2cdeda4784 -2 bytes for 6809 LZSA1 depacker, slightly faster 2020-06-19 20:07:41 +02:00
f198431a71 -2 bytes for 6809 LZSA2 depacker, speed unchanged 2020-06-19 18:26:25 +02:00
a6a69ebe17 Mention 6809 decompressors 2020-06-19 12:55:21 +02:00
f80706ff7e -5 bytes and faster 6809 LZSA2 depacker 2020-06-19 12:50:10 +02:00
c3b111cea6 -2 bytes and faster 6809 LZSA1 depacker 2020-06-19 12:49:46 +02:00
e253d3e628 -1 byte for 6809 LZSA2 depacker 2020-06-19 08:57:01 +02:00
26bf944d9f Add 6809 depacker for LZSA2 2020-06-18 17:48:08 +02:00
b88d3465e3 Compress LZSA2 a tiny bit faster 2020-06-18 17:47:31 +02:00
1d0427ecae Make 6809 depacker for LZSA1 smaller/faster 2020-06-18 17:46:42 +02:00
3ce9a2b36e Add 6809 depacker for LZSA1 2020-06-18 13:26:25 +02:00
930fe453eb Bump version 2020-05-29 19:03:17 +02:00
193f9f467b Fix C99-only feature 2020-05-28 20:00:30 +02:00
278fcc0256 Faster LZSA2 compression 2020-05-28 18:38:19 +02:00
231f6580c5 Another small LZSA2 compression speedup 2020-05-28 15:10:38 +02:00
dc413164ad Compress a little faster 2020-05-24 15:19:59 +02:00
07104538b7 Fix #46 2020-05-06 19:36:39 +02:00
c173a5130f Fix issue #45 and some extra warnings 2020-04-26 10:24:15 +02:00
d6c43f507c Add reference to RomWBW that uses LZSA2 2020-04-21 23:39:05 +02:00
b0e2e7df75 Add reference to the Commander X16 2020-04-20 18:14:13 +02:00
b61ed6a229 Add reference to Gameboy depacker 2020-04-20 16:32:28 +02:00
7d59fe3325 Bump version 2020-04-11 15:38:43 +02:00
668204d953 Merge optimizations by Pavel Zagrebin
Manually merge PR #44
2020-04-04 13:29:25 +02:00
47e54ac110 Remove unused code 2020-03-24 20:18:22 +01:00
26a64de95e Compress LZSA2 faster 2020-03-24 12:04:25 +01:00
f27ac9ae25 Remove code that is now unnecessary 2020-03-14 15:12:02 +01:00
236df36f2b Another small compression speedup 2020-03-14 13:13:01 +01:00
78276c18da Update README.md 2020-03-12 08:19:17 +01:00
0852f337bd Add link to Motorola 68K decompressors 2020-03-06 18:54:09 +01:00
3efb9928a1 Merge pull request #42 from odzhan/master
compact decompressors for x86
2020-01-13 23:38:08 +01:00
5fddaca0f8 compact decompressors for x86 2020-01-13 18:14:11 +00:00
693618f100 More asm fixes 2020-01-08 13:11:43 +01:00
aa122d1e05 Fix assembly when not using LZSA_SHORT_CP 2020-01-08 10:40:26 +01:00
315cda7b4f Merge pull request #40 from peterferrie/master
cut one byte
2020-01-08 10:26:29 +01:00
9f020b3605 fix typos in original code 2020-01-07 17:02:43 -08:00
22e2f80ae7 cut one byte 2020-01-07 16:57:49 -08:00
925e435e53 Merge pull request #39 from peterferrie/master
fasterer v2
2020-01-07 19:49:34 +01:00
45b91ddaa2 fasterer v2 2020-01-06 22:24:07 -08:00
fb7e03030f Merge pull request #38 from specke/master
-2 bytes
2020-01-03 10:37:28 +01:00
29e8960e6f Add faster LZSA1 6502 depacker by jbrandwood 2020-01-03 10:31:54 +01:00
410544f4e6 Fast 6502 LZSA2 depacker: smaller size, same speed 2020-01-03 10:01:23 +01:00
96b9933bd3 remove temporary label 2020-01-02 14:32:30 +00:00
9cd9fa5939 -2 bytes
(same speed)
2020-01-02 14:30:00 +00:00
3b37a0bb70 Merge pull request #5 from emmanuel-marty/master
Catch up with the changes in main
2020-01-02 13:51:29 +00:00
8721c11041 Add faster LZSA2 depacker by jbrandwood 2019-12-24 12:02:34 +01:00
a46796b6a9 Fix assembling of 6502 fast v1 depacker 2019-12-24 10:16:01 +01:00
64e641411e Small compression speedup 2019-12-21 18:59:56 +01:00
7068c258bd Bump version 2019-12-15 23:37:20 +01:00
a0ac24d105 Tiny LZSA1 ratio increase; small improvements 2019-12-15 23:36:51 +01:00
e9ca5032bc Ratio increase 2019-12-15 18:04:16 +01:00
fc5f540a68 Bump version 2019-12-09 09:55:50 +01:00
4c566286f5 Increase ratio 2019-12-09 09:54:56 +01:00
65a262ec95 Bump version 2019-11-27 15:26:34 +01:00
63da0eb49d Small speedup 2019-11-27 09:47:32 +01:00
78ad147799 Don't systematically encode last byte as literal 2019-11-26 20:48:13 +01:00
88f563d84c Secure last token decompression for non-raw blocks 2019-11-26 17:35:11 +01:00
d37589cfdb Expand forward repmatch candidates 2019-11-26 13:33:08 +01:00
0b5e915d83 Split non-rep from repmatch candidates 2019-11-26 11:58:34 +01:00
a38e8b126c Small LZSA2 compression speedup 2019-11-20 15:40:11 +01:00
5f4cf4dfc7 Fix for LZSA1 as well 2019-11-19 19:53:46 +01:00
8e4e7c06c7 Fix calculation for promoting literal+match seqs 2019-11-19 19:48:39 +01:00
b1738b4003 Promote some literal+match sequences to a match 2019-11-18 12:10:23 +01:00
e328f63feb Bump version 2019-11-13 00:57:31 +01:00
e0c42afac9 Increase LZSA2 ratio 2019-11-13 00:57:09 +01:00
ce7fc33646 Reduce memory use 2019-11-12 00:30:24 +01:00
53b2013b73 Small improvement to merging large matches 2019-11-11 18:41:08 +01:00
df9690a949 Merge pull request #36 from peterferrie/master
fast v1
2019-11-09 17:55:19 +01:00
302234a91b fast v2 2019-10-31 23:01:00 -07:00
9fc9a49d67 fast v1 2019-10-31 22:20:02 -07:00
7371486513 Bump version 2019-10-29 12:10:13 +01:00
f249597dfd Increase LZSA2 ratio 2019-10-29 12:09:14 +01:00
78f588a833 Clarify defines 2019-10-29 10:45:57 +01:00
c790fb8ebe Add link to Gabba ZX Spectrum demo, that uses LZSA 2019-10-28 09:21:57 +01:00
3c4f535e0b Increase LZSA2 ratio by ~0.02% 2019-10-27 14:55:39 +01:00
8551c3ff8a Merge pull request #35 from MobyGamer/decompressor/8086_speed_jumptable
Rewrite 8088 jumptable decompressor for maximum speed
2019-10-27 10:28:48 +01:00
30192238ea Rewrite 8088 jumptable decompressor for maximum speed
This is a rewrite of LZSA1JMP.ASM to use a 256-element jumptable, which
allows the code to handle all of the hot paths (common cases) without
any branching.  This not only reduces branches (which are very costly on
x86) to a bare minimum, but also grants us foreknowledge in a decode
path of what steps can be skipped.

The new code is 12.7% faster than the old code, and assembles to less
than 3K of object code and data.
2019-10-26 23:34:24 -05:00
53fcd3b1a8 Generalize merging very large matches 2019-10-24 13:05:32 +02:00
f4cf97f176 Merge pull request #34 from specke/master
Added option for unrolled copying of long matches
2019-10-22 21:52:48 +02:00
d5d788946e Added an option for unrolling long match copying
Usually useless and costing +57 bytes, this option can bring dramatic performance improvements on very compressible data dominated by long matches
2019-10-22 20:11:46 +01:00
e1e1276c96 Merge pull request #4 from emmanuel-marty/master
Re-sync with the main
2019-10-22 20:09:00 +01:00
16ac8c75af Add link to PDP-11 depackers by Ivan Gorodetsky 2019-10-22 17:13:05 +02:00
05d77095ca Bump version 2019-10-22 12:39:27 +02:00
b84fe7c332 Further increase LZSA2 ratio by ~0.1% on average 2019-10-22 12:37:46 +02:00
7dd039a152 Delete shrink_context.h 2019-10-22 12:37:16 +02:00
9f6ca2c25f Delete shrink_block_v2.c 2019-10-22 12:37:04 +02:00
dbaa3fa921 Further increase LZSA2 ratio by ~0.1% on average 2019-10-22 12:36:41 +02:00
2926ad8436 Remove unused #includes 2019-10-21 12:29:38 +02:00
d9156d3d2b Reduce LZSA1 token count by 2.5% on average 2019-10-19 13:10:41 +02:00
6adf92fc88 Merge pull request #33 from specke/master
-1 byte
2019-10-11 10:18:05 +02:00
96df02c532 Remove unused code 2019-10-11 09:20:36 +02:00
89f1664ae6 Remove unused code 2019-10-11 09:14:19 +02:00
c363ecf527 Remove unused code 2019-10-11 09:11:49 +02:00
5141ed7c59 Remove unused code 2019-10-11 09:11:41 +02:00
c77c666568 Remove unused code 2019-10-11 09:10:07 +02:00
115a81cb71 Remove unused code 2019-10-11 09:09:42 +02:00
4436f216ce Bump version 2019-10-11 09:06:50 +02:00
baa53f6889 Newly compressed LZSA2 files depack 0.7% faster 2019-10-11 09:05:58 +02:00
495a12216f -1 byte
Very slightly faster too
2019-10-11 00:23:43 +01:00
b5117c3dfe Fixes for -stats 2019-10-11 00:25:46 +02:00
f5ef6bf868 Merge pull request #32 from specke/master
Slightly faster unlzsa2_fast.asm for Z80
2019-10-11 00:22:12 +02:00
566e3a94e8 +0.2% speed
also, added an option to unroll LDIR for longer matches (which adds 38 bytes, but can be significantly faster for files with many long matches)
2019-10-10 22:50:23 +01:00
e3d7ec9c40 Merge pull request #3 from emmanuel-marty/master
Sync with E.Marty's branch
2019-10-10 22:46:53 +01:00
d209b73a30 Fix small bug 2019-10-10 14:42:08 +02:00
c1b18fb9fd Implement -stats 2019-10-09 18:20:22 +02:00
6ce846ff24 Speed up LZSA2 compression 2019-10-09 16:07:29 +02:00
b09dadb1c1 Small LZSA2 token count reduction 2019-10-09 13:16:29 +02:00
03f841d04f Speed up LZSA2 compression 2019-10-08 20:26:21 +02:00
44df8f3d2d Add early-out, speed LZSA2 compression up further 2019-10-08 16:23:33 +02:00
bfb383befd Speed up LZSA2 compression 2019-10-08 09:39:18 +02:00
39e2a90f81 Prevent small matchfinder inefficiency 2019-10-04 11:54:54 +02:00
33327201f7 Fix small LZSA2 token reduction inefficiency 2019-10-03 16:58:34 +02:00
29c6f3b2a3 Remove erroneous else statement 2019-09-26 19:13:09 +02:00
6a62f7d795 Update Z80 depackers changes history 2019-09-26 11:42:52 +02:00
681f78d1e8 Rename 2019-09-26 07:48:59 +02:00
8015ab8650 Rename 2019-09-26 07:48:44 +02:00
2f15298343 Rename 2019-09-26 07:48:33 +02:00
648a308d87 Rename 2019-09-26 07:48:19 +02:00
587a92f4ab Rename Z80 depackers, add version history to LZSA1 2019-09-26 07:47:43 +02:00
7d9135c548 Update Z80 decompressors 2019-09-25 08:09:18 +02:00
ac9de3795c Update Pareto frontier graph from spke 2019-09-25 07:56:47 +02:00
b4b4d39eff Fix newly added external link 2019-09-24 18:03:20 +02:00
cb46987628 Update stats and links 2019-09-24 18:02:24 +02:00
e55c80a475 Clean up use of MODESWITCH_PENALTY; bump version 2019-09-24 14:43:17 +02:00
de0ff5d3b0 Reduce memory used for compression 2019-09-24 00:21:17 +02:00
249b8a4c46 Increase LZSA2 ratio and use forward parser for -m 2019-09-23 20:24:50 +02:00
74040890fc Speed up LZSA2 compression (same binary output) 2019-09-23 16:58:03 +02:00
81e15d10f0 Add extra safety checks to LZSA2 token reducer 2019-09-22 20:41:09 +02:00
1869d85c1f Simplify LZSA1 token reducer (same binary output) 2019-09-22 20:34:08 +02:00
1a4f662360 Bump version 2019-09-20 12:26:16 +02:00
c12e20b7fb Improve LZSA2 compression ratio 2019-09-20 12:24:27 +02:00
51644ad2f9 Speed LZSA2 compression up further; fix typo 2019-09-19 17:18:37 +02:00
1495b27f69 Speed up LZSA1 compression with forward arrivals 2019-09-19 12:57:39 +02:00
c052a188f2 Reduce LZSA2 forward arrivals memory use 2019-09-19 11:46:03 +02:00
e4076e4090 Speed LZSA2 compression up; tiny ratio increase 2019-09-19 00:11:26 +02:00
8b7d0ab04d Increase LZSA2 ratio. Decrease token count 2019-09-17 08:10:52 +02:00
b1da9c1aee Add extra bound checks in C decompressors 2019-09-12 16:19:14 +02:00
b92a003338 Merge pull request #29 from francois-berder/master
Various improvements -- thank you!
2019-08-28 13:50:00 +02:00
4f2d7da136 Fix main return value if compressing
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:41:54 +01:00
a318ac2f83 Fix memory leak in comparestream_open
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:40:49 +01:00
da67938978 Set dictionnary to NULL in lzsa_dictionary_free
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:39:07 +01:00
2d213bcff1 Bump version number 2019-08-27 13:18:23 +02:00
9de7e930e9 Faster LZSA1 z80 decompression 2019-08-27 13:16:20 +02:00
ef259e6867 Implement forward arrivals optimal parsers 2019-08-27 00:51:34 +02:00
90b4da64d1 Merge pull request #27 from uniabis/twobytesshorter
2bytes shorter
2019-08-26 23:49:27 +02:00
a807344343 2bytes shorter 2019-08-22 12:55:55 +09:00
27d0fe4e83 Merge pull request #26 from arm-in/patch-1
Update README.md
2019-08-06 20:54:24 +02:00
f8e445a98a Update README.md
Now 67 bytes with commit be30cae636
2019-08-06 20:15:59 +02:00
0e567bde47 Merge pull request #25 from specke/master
-1 byte
2019-08-06 20:03:52 +02:00
be30cae636 -1 byte
slightly slower, but this is the size-optimized branch
2019-08-06 12:36:27 +01:00
1b368e71ad Fix comments, header single inclusion defines 2019-08-04 16:42:30 +02:00
d98220ff42 Merge pull request #24 from specke/master
New Pareto frontier graph
2019-08-01 16:51:29 +02:00
d412433df4 New Pareto frontier graph
Shows improved performance of the new Z80 decompressors, esp. due to the improvements by uniabis
2019-08-01 15:26:53 +01:00
77c1492310 Merge pull request #23 from specke/master
New faster and shorter decompressors
2019-08-01 16:19:24 +02:00
44bff39de3 New faster and shorter decompressors
This update is mostly about better integration of improvements by uniabis, with spke contributing several smaller size optimizations.
2019-08-01 15:07:14 +01:00
3c690b04f5 Merge pull request #22 from specke/master
incorporated improvements by uniabis
2019-08-01 01:34:46 +02:00
e7bb1faece Merge branch 'master' into master 2019-07-31 23:24:30 +01:00
e48d2dafde Merge pull request #21 from uniabis/hd64180 - up to 3% speedup on Z80!
hd64180 support on z80 unpacker
2019-07-31 23:57:23 +02:00
51ef92cdab incorporated improvements by uniabis
also, slightly faster decompression for fast packer in backwards mode
2019-07-31 20:42:47 +01:00
8d0528fddc hd64180 support
a bit faster, a bit smaller
2019-07-31 01:39:27 +09:00
b3aae36ecc Bump version 2019-07-28 00:25:51 +02:00
8787b1c3d8 Merge pull request #20 from specke/master (should be already fixed now..)
fix a bug in the backward version of unlzsa2_fast_v1.asm
2019-07-27 15:50:38 +02:00
0a04796b19 Fix for z80 LZSA2 fast backward depacker 2019-07-27 15:39:44 +02:00
ac3bf78273 fix a bug in the backward version of unlzsa2_fast_v1.asm
an INC HL slipped through
2019-07-27 14:14:54 +01:00
82edcb8bb5 Fix literal runs that are multiple of 256 bytes 2019-07-27 01:35:46 +02:00
b613d01565 Test incompressible data with raw blocks 2019-07-26 13:30:41 +02:00
ae4cc12aed Use ACME syntax 2019-07-26 12:31:26 +02:00
316dfdcdce Fix comments, remove unused vars 2019-07-26 01:12:17 +02:00
fd70be918c Merge pull request #19 from specke/master
Support for -b in Z80 decompressors
2019-07-24 20:09:48 +02:00
4835e4c26c Support backward decompression 2019-07-24 20:08:23 +02:00
cca79e3e59 Delete unlzsa_small_v1.asm 2019-07-24 17:31:22 +01:00
607b26d337 Delete unlzsa_fast_v1.asm 2019-07-24 17:31:14 +01:00
fd61f403ad LZSA1 decompressors with added support for -b. 2019-07-24 17:30:37 +01:00
fcfba056d2 Add files via upload
LZSA2 decompressors with support for -b option.
2019-07-24 17:28:39 +01:00
0c4dbf2b72 Add files via upload
Show decompression safety distance for raw blocks
2019-07-24 15:43:44 +02:00
9f313d6ee6 Handle EOD in C depacker; fix #18; fix typos in usage 2019-07-23 23:28:52 +02:00
04cc67cf42 Add refence to The Hollow 2019-07-16 20:41:36 +02:00
081a29a3db Fix copying multiples of 256 bytes 2019-07-14 16:14:55 +02:00
2d65f2a7c8 Bump version 2019-07-14 10:15:10 +02:00
710d7e05d6 Fix comments 2019-07-14 10:13:16 +02:00
0b540431fc Fix comments 2019-07-14 10:12:32 +02:00
981b1d5925 NASM versions of Jim Leonard's speed-optimized depackers 2019-07-14 10:11:16 +02:00
19e8bc0468 Merge pull request #17 from MobyGamer/decompressor/8086_speed
Time-efficient LZSA2 decompressor
2019-07-14 09:28:40 +02:00
c38b582e73 Time-efficient LZSA2 decompressor
This commit provides a time-effecient LZSA2 decompressor for
the 8088 (and higher) CPU.  Decompression speed is roughly 50% faster
than ZX7 on the same hardware.
2019-07-13 22:35:28 -05:00
6445d9ff6f Merge pull request #16 from peterferrie/master - thanks!
smaller
2019-07-12 10:34:58 +02:00
6797fe6268 smaller 2019-07-11 17:20:38 -07:00
7195104c73 Merge pull request #15 from MobyGamer/decompressor/8086_speed
Submit 8086-optimized decompressor using jump tables
2019-07-12 00:59:41 +02:00
f123a6d9df Submit 8086-optimized decompressor using jump tables
Because the 8086's BIU is more efficient and can read a word in
a single operation, we can skew LZSA1 decompression faster on 8086
by using a jump table to eliminate 2 comparions.
2019-07-11 17:22:29 -05:00
4d7d90b893 Merge pull request #14 from MobyGamer/decompressor/8086_speed
Additional minor speedups
2019-07-11 09:01:14 +02:00
455e6ee913 Merge pull request #13 from peterferrie/master
smaller
2019-07-11 08:57:10 +02:00
638c33b432 Additional minor speedups 2019-07-11 00:58:46 -05:00
3bfe60de44 use BYTE 2019-07-10 18:03:42 -07:00
fb53706361 smaller 2019-07-10 18:02:27 -07:00
d3d62c3bf0 Use BYTE 2019-07-11 01:52:13 +02:00
712c3c2f9f Merge pull request #12 from peterferrie/master
smaller
2019-07-11 01:50:57 +02:00
7861ef1552 smaller 2019-07-10 16:04:42 -07:00
36e7316f30 Merge pull request #11 from peterferrie/master
smaller
2019-07-10 22:35:01 +02:00
8e26fa9cac smaller 2019-07-10 10:57:41 -07:00
a1841e5e5d smaller 2019-07-10 10:48:15 -07:00
f52ba3c40c Merge pull request #10 from peterferrie/master - 17 bytes smaller
smaller
2019-07-10 11:36:00 +02:00
68339ad961 smaller 2019-07-09 18:44:37 -07:00
15c9059adf smaller 2019-07-09 18:30:29 -07:00
e85cb8a5bb smaller 2019-07-09 16:49:01 -07:00
467cd1970f Merge pull request #9 from MobyGamer/decompressor/8086_speed
Additional 1% speedup from deferring work
2019-07-09 20:42:17 +02:00
9a180a59f9 Additional 1% speedup from to introspec suggestions 2019-07-09 13:05:10 -05:00
12086b974a Fix comments, remove unused vars 2019-07-08 17:57:36 +02:00
f505e8d4e7 Update credits 2019-07-08 09:47:37 +02:00
b7b3ca1907 Merge pull request #8 from MobyGamer/decompressor/8086_speed - thank you!
8088-speed-optimized LZSA1 decompression
2019-07-08 09:17:51 +02:00
0c5bc1d218 Merge pull request #7 from peterferrie/master - thanks again!
a little more
2019-07-08 09:17:30 +02:00
b0c9d61aab 8088-speed-optimized LZSA1 decompression
This commit contributes LZSA1 raw block format decompression code,
optimized for speed for the 8088 and higher processors.  With
appropriate compression options (-f1 -m5), decompression speed is
comparable to LZ4.
2019-07-07 20:25:50 -05:00
111403c052 a little more 2019-07-07 12:05:02 -07:00
fe0dcf107f Use db rather than .byte 2019-07-04 18:09:29 +02:00
cdb27add47 Merge pull request #6 from peterferrie/master (thanks!)
a bit faster, a bit smaller
2019-07-04 18:08:49 +02:00
32b98f3085 a bit faster, a bit smaller 2019-07-03 19:38:27 -07:00
da4f6f7882 Restore CLC 2019-07-03 23:39:05 +02:00
740df3d629 Merge pull request #5 from peterferrie/master
slightly faster
2019-07-03 23:37:48 +02:00
6f46c60259 slightly faster 2019-07-03 12:11:50 -07:00
3c50727421 Update VS2017 project 2019-07-01 09:26:55 +02:00
59eb891394 Improve LZSA2 compression ratio further and allow incompressible raw blocks 2019-07-01 09:25:19 +02:00
8d8f50a509 Don't use 80186 opcodes. Issue #3 2019-06-27 18:32:49 +02:00
3c8c4283c7 Use real cost of rep offset when considering a reduction; fix small bug in repmatch handling when doing reductions. 2019-06-26 13:55:00 +02:00
2a265f9cf9 Remove erroneous comment 2019-06-25 11:13:13 +02:00
7867618f87 Fix matchfinder limitation 2019-06-25 11:09:19 +02:00
5a531cd4ce Remove 2019-06-21 17:40:33 +02:00
d1245c7d6a Fix small typo in z80 depackers 2019-06-21 17:39:50 +02:00
63c261ce2d Small improvement of the LZSA2 compression ratio 2019-06-21 17:28:17 +02:00
1c3911a659 Add Pareto frontier graph by spke 2019-06-11 10:43:41 +02:00
71f262081b Use -m3 as an upper bound for LZSA2 2019-06-11 10:38:47 +02:00
028163c42a Fix typo in final LZSA2 block format description 2019-06-10 14:10:57 +02:00
43ff88477f Update tool version number 2019-06-10 14:09:23 +02:00
a8e3f7bbcc Update to final LZSA2 block format 2019-06-10 14:08:53 +02:00
931f5fff9b Add Z80 decompressors for LZSA2 2019-06-10 13:24:15 +02:00
435e157239 Save 3 more bytes 2019-06-08 23:59:34 +02:00
aab82fa022 Save 9 bytes in 6502 LZSA1 depacker 2019-06-08 23:52:18 +02:00
d564c1ecb8 Fixes 2019-06-08 19:03:33 +02:00
2e48e926a1 Fix self-tests, in-memory benchmarks in raw mode 2019-06-08 18:05:00 +02:00
659f6c14a9 Small improvement over 6502 LZSA2 depacker 2019-06-08 16:18:36 +02:00
affa08c213 Update Xcode project 2019-06-08 13:37:48 +02:00
318dffe87c Update VS2017 project 2019-06-08 13:37:16 +02:00
6e1201e221 Moved 2019-06-08 13:36:40 +02:00
3008d47012 Moved 2019-06-08 13:36:20 +02:00
79ed7bf91e Further update LZSA2 format; avoid name conflicts 2019-06-08 13:35:03 +02:00
272f2e7a29 Update LZSA2 6502 and 8088 depackers 2019-06-07 23:22:34 +02:00
62003a82c2 Add Xcode project as well 2019-06-07 23:21:35 +02:00
fa2439a383 Update VS2017 project after code split 2019-06-07 23:21:04 +02:00
a7fbc93964 Update Makefile after code split 2019-06-07 23:20:26 +02:00
ddd2ac4c56 Remove file after restructuring 2019-06-07 23:18:52 +02:00
8ef0396b50 Remove file after restructuring 2019-06-07 23:18:43 +02:00
ee6ab423fc Remove file after restructuring 2019-06-07 23:18:14 +02:00
59d2383b03 Remove file after restructuring 2019-06-07 23:18:05 +02:00
74e8d82231 Remove file after restructuring 2019-06-07 23:17:56 +02:00
026537f7f2 Remove file after restructuring 2019-06-07 23:17:47 +02:00
998714d714 Remove file after restructuring 2019-06-07 23:17:37 +02:00
124adf8ade Remove file after restructuring 2019-06-07 23:17:16 +02:00
59d7074da1 Remove file after restructuring 2019-06-07 23:17:07 +02:00
e924390c8e Remove file after restructuring 2019-06-07 23:16:47 +02:00
47315447fd Remove file after restructuring 2019-06-07 23:16:29 +02:00
b4e3c07d3a Split code, add automated tests, update LZSA2 2019-06-07 23:15:40 +02:00
45cb124c4d Simplify and speed up in-tool decompressors; add in-memory benchmark 2019-05-17 08:57:01 +02:00
055a80abfd Fix another typo 2019-05-14 18:45:46 +02:00
d5d879adea Fix typo 2019-05-14 18:44:29 +02:00
3ae13b5410 Fix links to new documents 2019-05-14 18:42:22 +02:00
635e575992 Update format spec, stats 2019-05-14 18:38:40 +02:00
a708a02048 Fix compilation warning 2019-05-14 12:18:35 +02:00
277b5b1025 Move top-level streaming compression code to library 2019-05-13 22:22:53 +02:00
0f4ffa7166 Clean up check for emitting a single raw block 2019-05-12 23:21:50 +02:00
c96594680a Use full BLOCK_SIZE for dictionary, and use last BLOCK_SIZE bytes rather than first 2019-05-12 14:44:28 +02:00
3caa72c722 Clarify optimizer 2019-05-11 11:42:18 +02:00
d70830b525 Simplify nibble handling in LZSA2 8088 depacker 2019-05-11 11:42:00 +02:00
8b7b4a2b4f Check in LZSA2 implementation (ratio competitive with ZX7, faster decompression) 2019-05-09 16:51:29 +02:00
49b0739050 Isolate frame implementation details 2019-05-03 19:17:14 +02:00
31ededcdf7 Rejoin large patterns 2019-05-03 13:45:37 +02:00
4011d7d3c8 Check in VS 2017 project 2019-05-02 23:51:42 +02:00
bab5225e4c Implement dictionary support 2019-05-02 18:38:57 +02:00
ef6b43a296 When using -m, decompress ~7% faster by trading ~0.5% of compression ratio 2019-05-02 11:23:57 +02:00
97dd3ffc1f Update fast Z80 decompressor 2019-04-25 17:19:58 +02:00
bd6e31b03c Update stats 2019-04-25 13:03:38 +02:00
9e75a55dd3 Reduce token count by 1% without changing the ratio 2019-04-25 13:01:56 +02:00
b7967c3aa1 Update README 2019-04-24 10:02:35 +02:00
2b9780bd65 Finalize lzsa1 compressed format, speed up and simplify decompression 2019-04-24 09:47:40 +02:00
593110ae5d Fix parsing of -m option 2019-04-22 14:16:25 +02:00
331d6f9911 Add --prefer-ratio, --prefer-speed, -m options 2019-04-21 09:41:12 +02:00
f837ed096e Add size-optimized Z80 decompressor, contributed by spke 2019-04-21 08:15:43 +02:00
6c4a86c9fe Make compression 5% faster 2019-04-20 16:24:54 +02:00
fb79d319cb Speed up portable decompressor a bit 2019-04-20 10:27:24 +02:00
3e5639afaf Reduce token count further 2019-04-20 10:26:45 +02:00
88054841cf Add strict block checks when decompressing, comparing 2019-04-20 10:26:13 +02:00
b3268b74eb Update stats 2019-04-13 12:38:18 +02:00
aa5dbe283d Small optimization to the cost model 2019-04-13 12:33:39 +02:00
4913fe5d48 Fix reporting of compressed size for raw mode 2019-04-13 12:33:25 +02:00
cf51f1775a Fix small literals cost estimate issue in optimizer, gain a small amount of compression and reduction of token count 2019-04-11 23:47:57 +02:00
4c5d44fff4 Improve hotpath of 6502 depacker further 2019-04-10 19:41:41 +02:00
2451d5802e Fix typo in lzsa tool 2019-04-10 17:38:22 +02:00
8c3fc6d652 Simplify short offset handling in 6502 depacker 2019-04-10 17:37:36 +02:00
c220694531 Add 6502 decompressor 2019-04-10 17:30:24 +02:00
02592cfe3b Fix typo in 8088 decompressor comments 2019-04-10 17:30:13 +02:00
953f2934a1 Merge branch 'master' of https://github.com/emmanuel-marty/lzsa 2019-04-08 09:44:19 +02:00
9649073131 Speed up compression 2019-04-08 09:44:07 +02:00
061d24e49e Update stats with results for compressor v0.5.0 2019-04-08 08:52:59 +02:00
837b6895cf Further reduce the number of tokens for even better decompression speed; increase compression slightly 2019-04-07 15:10:53 +02:00
9c35d5115b When verbose, display uncompressed bytes/token ratio 2019-04-07 15:10:17 +02:00
be54a4f889 Check in optimized-for-speed Z80 decompressor contributed by spke 2019-04-07 00:01:42 +02:00
6aa2dae4b3 Add context to libdivsufsort, don't allocate memory during compression 2019-04-07 00:01:22 +02:00
e24320b23b Save 1 byte in 8088 decompressor 2019-04-06 00:21:15 +02:00
75 changed files with 13710 additions and 1932 deletions

65
BlockFormat_LZSA1.md Normal file
View File

@ -0,0 +1,65 @@
# Block data format (LZSA1)
Blocks encoded as LZSA1 are composed from consecutive commands. Each command follows this format:
* token: <O|LLL|MMMM>
* optional extra literal length
* literal values
* match offset low
* optional match offset high
* optional extra encoded match length
**token**
The token byte is broken down into three parts:
7 6 5 4 3 2 1 0
O L L L M M M M
* L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for this command is 0 to 6, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 7 is encoded and extra bytes follow as 'optional extra literal length'
* M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the encoded match length for this command is 0 to 14, it is directly stored, otherwise 15 is stored and extra bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 3 bytes. For instance, an actual match length of 10 bytes to be copied, is encoded as 7.
* O: set for a 2-bytes match offset, clear for a 1-byte match offset
**optional extra literal length**
If the literals length is 7 or more, the 'L' bits in the token form the value 7, and an extra byte follows here, with three possible types of value:
* 0-248: the value is added to the 7 stored in the token, to compose the final literals length. For instance a length of 206 will be stored as 7 in the token + a single byte with the value of 199, as 7 + 199 = 206.
* 250: a second byte follows. The final literals value is 256 + the second byte. For instance, a literals length of 499 is encoded as 7 in the token, a byte with the value of 250, and a final byte with the value of 243, as 256 + 243 = 499.
* 249: a second and third byte follow, forming a little-endian 16-bit value. The final literals value is that 16-bit value. For instance, a literals length of 1024 is stored as 7 in the token, then byte values of 249, 0 and 4, as (4 * 256) = 1024.
The extension byte values are chosen so that all three cases can be detected on 8-bit CPUs with a simple addition and overflow check.
**literal values**
Literal bytes, whose number is specified by the literals length, follow here. There can be zero literals in a command.
Important note: for blocks that are part of a stream, the last command in a block ends here, as it always contains literals only. For raw blocks, the last command does contain the match offset and match length, see the note below for EOD detection.
**match offset low**
The low 8 bits of the match offset follows.
**optional match offset high**
If the 'O' bit (bit 7) is set in the token, the high 8 bits of the match offset follow, otherwise they are understood to be all set to 1. For instance, a short offset of 0x70 is interpreted as 0xff70.
**important note regarding match offsets: stored as negative values**
Note that the match offset is negative: it is added to the current decompressed location and not substracted, in order to locate the back-reference to copy.
**optional extra encoded match length**
If the encoded match length is 15 or more, the 'M' bits in the token form the value 15, and an extra byte follows here, with three possible types of value.
* 0-237: the value is added to the 15 stored in the token. The final value is 3 + 15 + this byte.
* 239: a second byte follows. The final match length is 256 + the second byte.
* 238: a second and third byte follow, forming a little-endian 16-bit value. The final encoded match length is that 16-bit value.
Again, the extension byte values are chosen so that all cases can be detected with a simple addition and overflow check on 8-bit CPUs.
# End Of Data detection for raw blocks
When the LZSA1 block is part of a stream (see StreamFormat.md), as previously mentioned, the block ends after the literal values of the last command, without a match offset or match length.
However, in a raw LZSA1 block, the last command does include a 1-byte match offset (set to zero) and a match length. The match length is encoded as a long zero: the 'M' bits in the token form the value 15, then an extra match length byte is present, with the value 238 ("two match length bytes follow"). Finally, a two-byte zero match length follows, indicating the end of the block. EOD is the only time a zero match length (which normally would indicate a copy of 3 bytes) is encoded as a large 2-byte match value. This allows the EOD test to exist in a rarely used code branch.

89
BlockFormat_LZSA2.md Normal file
View File

@ -0,0 +1,89 @@
# Block data format (LZSA2)
Blocks encoded as LZSA2 are composed from consecutive commands. Each command follows this format:
* token: <XYZ|LL|MMM>
* optional extra literal length
* literal values
* match offset
* optional extra encoded match length
**token**
The token byte is broken down into three parts:
7 6 5 4 3 2 1 0
X Y Z L L M M M
* L: 2-bit literals length (0-2, or 3 if extended). If the number of literals for this command is 0 to 2, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 3 is encoded and extra nibbles or bytes follow as 'optional extra literal length'
* M: 3-bit encoded match length (0-6, or 7 if extended). Likewise, if the encoded match length for this command is 0 to 6, it is directly stored, otherwise 7 is stored and extra nibbles or bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 2 bytes. For instance, an actual match length of 5 bytes to be copied, is encoded as 3.
* XYZ: 3-bit value that indicates how to decode the match offset
**optional extra literal length**
If the literals length is 3 or more, the 'L' bits in the token form the value 3, and an extra nibble is read:
* 0-14: the value is added to the 3 stored in the token, to compose the final literals length.
* 15: an extra byte follows
If an extra byte follows, it can have two possible types of value:
* 0-237: 18 is added to the value (3 from the token + 15 from the nibble), to compose the final literals length. For instance a length of 206 will be stored as 3 in the token + a nibble with the value of 15 + a single byte with the value of 188.
* 239: a second and third byte follow, forming a little-endian 16-bit value. The final literals value is that 16-bit value. For instance, a literals length of 1027 is stored as 3 in the token, a nibble with the value of 15, then byte values of 239, 3 and 4, as 3 + (4 * 256) = 1027.
**literal values**
Literal bytes, whose number is specified by the literals length, follow here. There can be zero literals in a command.
Important note: for blocks that are part of a stream, the last command in a block ends here, as it always contains literals only. For raw blocks, the last command does contain the match offset and match length, see the note below for EOD detection.
**match offset**
The match offset is decoded according to the XYZ bits in the token
XYZ
00Z 5-bit offset: read a nibble for offset bits 1-4 and use the inverted bit Z of the token as bit 0 of the offset. set bits 5-15 of the offset to 1.
01Z 9-bit offset: read a byte for offset bits 0-7 and use the inverted bit Z for bit 8 of the offset. set bits 9-15 of the offset to 1.
10Z 13-bit offset: read a nibble for offset bits 9-12 and use the inverted bit Z for bit 8 of the offset, then read a byte for offset bits 0-7. set bits 13-15 of the offset to 1. substract 512 from the offset to get the final value.
110 16-bit offset: read a byte for offset bits 8-15, then another byte for offset bits 0-7.
111 repeat offset: reuse the offset value of the previous match command.
The bit ordering and inversion helps optimize the decoder for size and speed on 8-bit CPUs.
**important note regarding match offsets: stored as negative values**
Note that the match offset is negative: it is added to the current decompressed location and not substracted, in order to locate the back-reference to copy. For this reason, as already indicated, unexpressed offset bits are set to 1 instead of 0.
**optional extra encoded match length**
If the encoded match length is 7 or more, the 'M' bits in the token form the value 7, and an extra nibble is read:
* 0-14: the value is added to the 3 stored in the token, and then the minmatch of 2 is added, to compose the final match length.
* 15: an extra byte follows
If an extra byte follows here, it can have two possible types of value:
* 0-231: 24 is added to the value (7 from the token + 15 from the nibble + minmatch of 2), to compose the final match length. For instance a length of 150 will be stored as 7 in the token + a nibble with the value of 15 + a single byte with the value of 126.
* 233: a second and third byte follow, forming a little-endian 16-bit value. The final encoded match length is that 16-bit value.
# End Of Data detection for raw blocks
When the LZSA2 block is part of a stream (see StreamFormat.md), as previously mentioned, the block ends after the literal values of the last command, without a match offset or match length.
However, in a raw LZSA2 block, the last command does include a 9-bit match offset (set to zero, to be ignored) and a EOD marker as the match length. The EOD match length marker is encoded as such: the 'M' bits in the token form the value 7, then a nibble with the value of 15 is present, then a single extra match length byte with the value of 232, indicating the end of the block. This allows the EOD test to exist in a rarely used code branch.
The EOD condition can be easily checked as part of the tri-state condition when handling long matches. When 24 is added to the match byte value:
- If the byte doesn't overflow, the final match length is ready
- If the byte overflows and equals zero, the EOD marker has been hit
- Otherwise, if the overflows and doesn't equal zero, a 16-bit match length must be read.
This tri-state test translates to only an addition and two branches on 8-bit CPUs.
The equivalent EOD condition in literal lengths (which would be byte 238, that would overflow to exactly 0 when adding 18) is never emitted, so for size-optimized decompressors, the same code can be used to read both types of lengths.
# Reading nibbles
When the specification indicates that a nibble (4 bit value) must be read:
* If there are no nibbles ready, read a byte immediately. Return the high 4 bits (bits 4-7) as the nibble and store the low 4 bits for later. Flag that a nibble is ready for next time.
* If a nibble is ready, return the previously stored low 4 bits (bits 0-3) and flag that no nibble is ready for next time.

View File

@ -1,3 +1,3 @@
The LZSA code is available under the Zlib license, except for src/shrink.c which is placed under the Creative Commons CC0 license.
The LZSA code is available under the Zlib license, except for src/matchfinder.c which is placed under the Creative Commons CC0 license.
Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information.

View File

@ -1,5 +1,5 @@
CC=clang
CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc -DHAVE_CONFIG_H
CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc
OBJDIR=obj
LDFLAGS=
STRIP=strip
@ -10,13 +10,25 @@ $(OBJDIR)/%.o: src/../%.c
APP := lzsa
OBJS := $(OBJDIR)/src/main.o
OBJS += $(OBJDIR)/src/shrink.o
OBJS += $(OBJDIR)/src/expand.o
OBJS += $(OBJDIR)/src/lzsa.o
OBJS += $(OBJDIR)/src/dictionary.o
OBJS += $(OBJDIR)/src/expand_block_v1.o
OBJS += $(OBJDIR)/src/expand_block_v2.o
OBJS += $(OBJDIR)/src/expand_context.o
OBJS += $(OBJDIR)/src/expand_inmem.o
OBJS += $(OBJDIR)/src/expand_streaming.o
OBJS += $(OBJDIR)/src/frame.o
OBJS += $(OBJDIR)/src/matchfinder.o
OBJS += $(OBJDIR)/src/shrink_block_v1.o
OBJS += $(OBJDIR)/src/shrink_block_v2.o
OBJS += $(OBJDIR)/src/shrink_context.o
OBJS += $(OBJDIR)/src/shrink_inmem.o
OBJS += $(OBJDIR)/src/shrink_streaming.o
OBJS += $(OBJDIR)/src/stream.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort_utils.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/utils.o
all: $(APP)

170
README.md
View File

@ -1,132 +1,94 @@
LZSA is a byte-aligned compression format that is specifically engineered for very fast decompression on 8-bit systems. It can compress files of any size by using blocks of a maximum size of 64 Kb with block-interdependent compression and up to 64 Kb of back-references for matches.
LZSA is a collection of byte-aligned compression formats that are specifically engineered for very fast decompression on 8-bit systems. It can compress files of any size by using blocks of a maximum size of 64 Kb with block-interdependent compression and up to 64 Kb of back-references for matches.
![Pareto frontier](pareto_graph.png)
<sup>*ZX Spectrum</sup>
Check out [The Hollow](https://www.pouet.net/prod.php?which=81909) by Darklite and Offense, winner of the Solskogen 2019 wild compo, that uses LZSA on Z80.
[Gabba](https://www.pouet.net/prod.php?which=83539) by Stardust ranked 2nd in the ZX Spectrum demo compo at CAFe demoparty 2019 and also used LZSA on Z80.
[Myst Demake](http://www.deater.net/weave/vmwprod/mist/) for the Apple II by Vince Weaver, uses LZSA on 6502.
The 8 bit guy's [Commander X16 ROM](https://github.com/commanderx16/x16-rom) uses LZSA on 6502 as well.
[RomWBW](https://github.com/wwarthen/RomWBW) uses LZSA on Z80 for a variety of hobbyist computers.
The LZSA compression tool uses an aggressive optimal packing strategy to try to find the sequence of commands that gives the smallest packed file that decompresses to the original while maintaining the maximum possible decompression speed.
The compression formats give the user choices that range from decompressing faster than LZ4 on 8-bit systems with better compression, to compressing as well as ZX7 with much better decompression speed. LZSA1 is designed to replace LZ4 and LZSA2 to replace ZX7, in 8-bit scenarios.
Compression ratio comparison between LZSA and other optimal packers, for a workload composed of ZX Spectrum and C64 files:
ZX7 57,36% (entropy coding)
LZ5 1.4.1 59,82%
LZSA 60,84% <------ (single byte stream)
Lizard -29 64,14% (rep-match, 4 byte streams)
LZ4_HC -19 -B4 -BD 64,5% (single byte stream)
Uncompressed 100%
Bytes Ratio Decompression speed vs. LZ4
LZSA2 676681 52,49% <------ 75%
MegaLZ 4.89 679041 52,68% Not measured
ZX7 687133 53,30% 47,73%
LZ5 1.4.1 727107 56,40% 75%
LZSA1 735785 57,08% <------ 90%
Lizard -29 776122 60,21% Not measured
LZ4_HC -19 -B4 -BD 781049 60,59% 100%
Uncompressed 1289127 100% N/A
Performance over well-known compression corpus files:
Uncompressed LZ4_HC -19 -B4 -BD LZSA
Canterbury 2810784 935827 (33,29%) 855284 (30,43%)
Silesia 211938580 77299725 (36,47%) 73793860 (34,82%)
Calgary 3251493 1248780 (38,40%) 1196809 (36,80%)
Large 11159482 3771025 (33,79%) 3648987 (32,70%)
enwik9 1000000000 371841591 (37,18%) 355427361 (35,54%)
As an example of LZSA's simplicity, a size-optimized decompressor on 8088 has been implemented in 91 bytes.
Uncompressed LZ4_HC -19 -B4 -BD LZSA1 LZSA2
Canterbury 2810784 935827 (33,29%) 850792 (30,27%) 770877 (27,43%)
Silesia 211938580 77299725 (36,47%) 73706340 (34,78%) 68928564 (32,52%)
Calgary 3251493 1248780 (38,40%) 1192123 (36,67%) 1110290 (34,15%)
Large 11159482 3771025 (33,79%) 3648393 (32,69%) 3519480 (31,54%)
enwik9 1000000000 371841591 (37,18%) 355360043 (35,54%) 334900611 (33,49%)
As an example of LZSA1's simplicity, a size-optimized decompressor on Z80 has been implemented in 67 bytes.
The compressor is approximately 2X slower than LZ4_HC but compresses better while maintaining similar decompression speeds and decompressor simplicity.
The main differences with the LZ4 compression format are:
The main differences between LZSA1 and the LZ4 compression format are:
* The use of short (8-bit) match offsets where possible. The match-finder and optimizer cooperate to try and use the shortest match offsets possible.
* Shorter encoding of lengths. As blocks are maximum 64 Kb in size, lengths can only be up to 64 Kb.
* As a result of the smaller commands due to the possibly shorter match offsets, a minimum match size of 3 bytes instead of 4. The use of small matches is driven by the optimizer, and used where they provide gains.
Inspirations:
As for LZSA2:
* 5-bit, 9-bit, 13-bit and 16-bit match offsets, using nibble encoding
* Rep-matches
* Shorter encoding of lengths, also using nibbles
* A minmatch of 2 bytes
* No (slow) bit-packing. LZSA2 uses byte alignment in the hot path, and nibbles.
* [LZ4](https://github.com/lz4/lz4) by Yann Collet.
* [LZ5/Lizard](https://github.com/inikep/lizard) by Przemyslaw Skibinski and Yann Collet.
Inspirations:
* [LZ4](https://github.com/lz4/lz4) by Yann Collet.
* [LZ5/Lizard](https://github.com/inikep/lizard) by Przemyslaw Skibinski and Yann Collet.
* The suffix array intervals in [Wimlib](https://wimlib.net/git/?p=wimlib;a=tree) by Eric Biggers.
* ZX7 by Einar Saukas
* [apc](https://github.com/svendahl/cap) by Sven-Åke Dahl
* [Charles Bloom](http://cbloomrants.blogspot.com/)'s compression blog
License:
* The LZSA code is available under the Zlib license.
* The compressor (shrink.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder.
* The LZSA code is available under the Zlib license.
* The match finder (matchfinder.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder.
# Stream format
8-bit assembly code:
The stream format is composed of:
* a header
* one or more frames
* a footer
* Z80 decompressors (size- and speed-optimized) written by [introspec](https://github.com/specke) with optimizations by [uniabis](https://github.com/uniabis)
* 6502 and 8088 size-optimized improvements by [Peter Ferrie](https://github.com/peterferrie)
* 6502 speed-optimized decompressor by [John Brandwood](https://github.com/jbrandwood)
* 8088 speed-optimized decompressor by [Jim Leonard](https://github.com/mobygamer)
* 6809 decompressors (Tandy CoCo, Thomson MO/TO, Dragon 32/64..) optimized by [Doug Masten](https://github.com/dougmasten)
# Header format
External links:
The 3-bytes header contains a signature and a traits byte:
* [i8080 decompressors](https://gitlab.com/ivagor/lzsa8080/tree/master) by Ivan Gorodetsky
* [PDP-11 decompressors](https://gitlab.com/ivagor/lzsa8080/tree/master/PDP11) also by Ivan Gorodetsky
* [MC68000 decompressors](https://github.com/tattlemuss/lz4-m68k/blob/master/src/lzsa.s) by Steven Tattersall
* [Gameboy decompressors](https://github.com/meltycode) by Meltycode, based on the Z80 code by introspec
* LZSA's page on [Pouet](https://www.pouet.net/prod.php?which=81573)
0 1 2
0x7b 0x9e 0x00
<--- signature ---> <- traits ->
# Compressed format
The traits are set to 0x00 for this version of the format.
Decompression code is provided for common 8-bit CPUs such as Z80 and 6502. However, if you would like to write your own, or understand the encoding, LZSA compresses data to a format that is fast and simple to decompress on 8-bit CPUs. It is encoded in either a stream of blocks, or as a single raw block, depending on command-line settings. The encoding is deliberately designed to avoid complicated operations on 8-bits (such as 16-bit math).
# Frame format
Each frame contains a 3-bytes length followed by block data that expands to up to 64 Kb of decompressed data.
0 1 2
DSZ0 DSZ1 U|DSZ2
* DSZ0 (length byte 0) contains bits 0-7 of the block data size
* DSZ1 (length byte 1) contains bits 8-15 of the block data size
* DSZ2 (bit 0 of length byte 2) contains bit 16 of the block data size
* U (bit 7 of length byte 2) is set if the block data is uncompressed, and clear if the block data is compressed.
* Bits 1..6 of length byte 2 are currently undefined and must be set to 0.
# Block data format
LZSA blocks are composed from consecutive commands. Each command follows this format:
* token: <O|LLL|MMMM>
* optional extra literal length
* literal values
* match offset low
* optional match offset high
* optional extra encoded match length
**token**
The token byte is broken down into three parts:
7 6 5 4 3 2 1 0
O L L L M M M M
* L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for this command is 0 to 6, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 7 is encoded and extra bytes follow as 'optional extra literal length'
* M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the encoded match length for this command is 0 to 14, it is directly stored, otherwise 15 is stored and extra bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 3 bytes. For instance, an actual match length of 10 bytes to be copied, is encoded as 7.
* O: set for a 2-bytes match offset, clear for a 1-byte match offset
**optional extra literal length**
If the literals length is 7 or more, the 'L' bits in the token form the value 7, and an extra byte follows here, with three possible types of value:
* 0-253: the value is added to the 7 stored in the token, to compose the final literals length. For instance a length of 206 will be stored as 7 in the token + a single byte with the value of 199, as 7 + 199 = 206.
* 254: a second byte follows. The final literals value is 7 + 254 + the second byte. For instance, a literals length of 499 is encoded as 7 in the token, a byte with the value of 254, and a final byte with teh value of 238, as 7 + 254 + 238 = 499.
* 255: a second and third byte follow, forming a little-endian 16-bit value. The final literals value is that 16-bit value. For instance, a literals length of 1024 is stored as 7 in the token, then byte values of 255, 0 and 4, as (4 * 256) = 1024.
**literal values**
Literal bytes, whose number is specified by the literals length, follow here. There can be zero literals in a command.
Important note: the last command in a block ends here, as it always contains literals only.
**match offset low**
The low 8 bits of the match offset follows.
**optional match offset high**
If the 'O' bit (bit 7) is set in the token, the high 8 bits of the match offset follow, otherwise they are understood to be all set to 0.
**important note regarding match offsets: off by 1**
Note that the match offset is *off by 1*: a value of 0 refers to the byte preceding the current output index (N-1). A value of 1 refers to two bytes before the current output index (N-2) and so on. This is so that match offsets up to 256 can be encoded as a single byte, for extra compression.
**optional extra encoded match length**
If the encoded match length is 15 or more, the 'M' bits in the token form the value 15, and an extra byte follows here, with three possible types of value.
* 0-253: the value is added to the 15 stored in the token. The final value is 3 + 15 + this byte.
* 254: a second byte follows. The final encoded match length is 15 + 254 + the second byte, which gives an actual match length of 3 + 15 + 254 + the second byte.
* 255: a second and third byte follow, forming a little-endian 16-bit value. The final encoded match length is 3 + that 16-bit value.
# Footer format
The stream ends with the EOD frame: the 3 length bytes are set to 0x00, 0x00, 0x00, and no block data follows.
* [Stream format](https://github.com/emmanuel-marty/lzsa/blob/master/StreamFormat.md)
* [Block encoding for LZSA1](https://github.com/emmanuel-marty/lzsa/blob/master/BlockFormat_LZSA1.md)
* [Block encoding for LZSA2](https://github.com/emmanuel-marty/lzsa/blob/master/BlockFormat_LZSA2.md)

39
StreamFormat.md Normal file
View File

@ -0,0 +1,39 @@
# Stream format
The stream format is composed of:
* a header
* one or more frames
* a footer
# Header format
The 3-bytes LZSA header contains a signature and a traits byte:
0 1 2
0x7b 0x9e 7 6 5 4 3 2 1
V V V Z Z Z Z
<--- signature ---> <- traits ->
Trait bits:
* V: 3 bit code that indicates which block data encoding is used. 0 is LZSA1 and 1 is LZSA2.
* Z: these bits in the traits are set to 0 for LZSA1 and LZSA2.
# Frame format
Each frame contains a 3-bytes length followed by block data that expands to up to 64 Kb of decompressed data. The block data is encoded either as LZSA1 or LZSA2 depending on the V bits of the traits byte in the header.
0 1 2
DSZ0 DSZ1 U|DSZ2
* DSZ0 (length byte 0) contains bits 0-7 of the block data size
* DSZ1 (length byte 1) contains bits 8-15 of the block data size
* DSZ2 (bit 0 of length byte 2) contains bit 16 of the block data size
* U (bit 7 of length byte 2) is set if the block data is uncompressed, and clear if the block data is compressed.
* Bits 1..6 of length byte 2 are currently undefined and must be set to 0.
# Footer format
The stream ends with the EOD frame: the 3 length bytes are set to 0x00, 0x00, 0x00, and no block data follows.

3
VS2017/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
.vs
Debug
Release

31
VS2017/lzsa.sln Executable file
View File

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.28307.489
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lzsa", "lzsa.vcxproj", "{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Debug|x64.ActiveCfg = Debug|x64
{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Debug|x64.Build.0 = Debug|x64
{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Debug|x86.ActiveCfg = Debug|Win32
{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Debug|x86.Build.0 = Debug|Win32
{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Release|x64.ActiveCfg = Release|x64
{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Release|x64.Build.0 = Release|x64
{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Release|x86.ActiveCfg = Release|Win32
{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A1E1655C-AA9F-41F0-80C9-18DD0B859D7C}
EndGlobalSection
EndGlobal

225
VS2017/lzsa.vcxproj Executable file
View File

@ -0,0 +1,225 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>lzsa</RootNamespace>
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<OutDir>$(ProjectDir)bin\</OutDir>
<TargetName>$(ProjectName)_debug</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<OutDir>$(ProjectDir)bin\</OutDir>
<TargetName>$(ProjectName)_debug</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<OutDir>$(ProjectDir)bin\</OutDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<OutDir>$(ProjectDir)bin\</OutDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<PrecompiledHeaderFile>
</PrecompiledHeaderFile>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<AdditionalIncludeDirectories>..\src\libdivsufsort\include;..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<OutputFile>$(ProjectDir)bin\$(TargetName)$(TargetExt)</OutputFile>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<PrecompiledHeaderFile>
</PrecompiledHeaderFile>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<AdditionalIncludeDirectories>..\src\libdivsufsort\include;..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<OutputFile>$(ProjectDir)bin\$(TargetName)$(TargetExt)</OutputFile>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<PrecompiledHeaderFile>
</PrecompiledHeaderFile>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalIncludeDirectories>..\src\libdivsufsort\include;..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<OmitFramePointers>true</OmitFramePointers>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<OutputFile>$(ProjectDir)bin\$(TargetName)$(TargetExt)</OutputFile>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<PrecompiledHeaderFile>
</PrecompiledHeaderFile>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalIncludeDirectories>..\src\libdivsufsort\include;..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<OmitFramePointers>true</OmitFramePointers>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<OutputFile>$(ProjectDir)bin\$(TargetName)$(TargetExt)</OutputFile>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\src\dictionary.h" />
<ClInclude Include="..\src\expand_context.h" />
<ClInclude Include="..\src\expand_streaming.h" />
<ClInclude Include="..\src\expand_block_v1.h" />
<ClInclude Include="..\src\expand_block_v2.h" />
<ClInclude Include="..\src\format.h" />
<ClInclude Include="..\src\frame.h" />
<ClInclude Include="..\src\expand_inmem.h" />
<ClInclude Include="..\src\lib.h" />
<ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h" />
<ClInclude Include="..\src\libdivsufsort\include\divsufsort.h" />
<ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h" />
<ClInclude Include="..\src\matchfinder.h" />
<ClInclude Include="..\src\shrink_context.h" />
<ClInclude Include="..\src\shrink_inmem.h" />
<ClInclude Include="..\src\shrink_streaming.h" />
<ClInclude Include="..\src\shrink_block_v1.h" />
<ClInclude Include="..\src\shrink_block_v2.h" />
<ClInclude Include="..\src\stream.h" />
<ClInclude Include="pch.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\src\dictionary.c" />
<ClCompile Include="..\src\expand_context.c" />
<ClCompile Include="..\src\expand_streaming.c" />
<ClCompile Include="..\src\expand_block_v1.c" />
<ClCompile Include="..\src\expand_block_v2.c" />
<ClCompile Include="..\src\frame.c" />
<ClCompile Include="..\src\expand_inmem.c" />
<ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c" />
<ClCompile Include="..\src\libdivsufsort\lib\sssort.c" />
<ClCompile Include="..\src\libdivsufsort\lib\trsort.c" />
<ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c" />
<ClCompile Include="..\src\lzsa.c" />
<ClCompile Include="..\src\matchfinder.c" />
<ClCompile Include="..\src\shrink_context.c" />
<ClCompile Include="..\src\shrink_inmem.c" />
<ClCompile Include="..\src\shrink_streaming.c" />
<ClCompile Include="..\src\shrink_block_v1.c" />
<ClCompile Include="..\src\shrink_block_v2.c" />
<ClCompile Include="..\src\stream.c" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

147
VS2017/lzsa.vcxproj.filters Executable file
View File

@ -0,0 +1,147 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Fichiers sources">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Fichiers d%27en-tête">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Fichiers de ressources">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
<Filter Include="Fichiers sources\libdivsufsort">
<UniqueIdentifier>{5ec09c0d-19f7-4a6f-b524-f405fb99e48c}</UniqueIdentifier>
</Filter>
<Filter Include="Fichiers sources\libdivsufsort\lib">
<UniqueIdentifier>{a922f475-1322-496d-8a6d-7f1c6b92423d}</UniqueIdentifier>
</Filter>
<Filter Include="Fichiers sources\libdivsufsort\include">
<UniqueIdentifier>{bd05c6e8-af92-4ab8-8916-0424cd8d186b}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="pch.h">
<Filter>Fichiers d%27en-tête</Filter>
</ClInclude>
<ClInclude Include="..\src\format.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\libdivsufsort\include\divsufsort.h">
<Filter>Fichiers sources\libdivsufsort\include</Filter>
</ClInclude>
<ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h">
<Filter>Fichiers sources\libdivsufsort\include</Filter>
</ClInclude>
<ClInclude Include="..\src\frame.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\matchfinder.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\lib.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\stream.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\expand_streaming.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\expand_inmem.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\dictionary.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\shrink_context.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\shrink_streaming.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\expand_context.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\expand_block_v1.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\expand_block_v2.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\shrink_block_v1.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\shrink_block_v2.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\shrink_inmem.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h">
<Filter>Fichiers sources\libdivsufsort\include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c">
<Filter>Fichiers sources\libdivsufsort\lib</Filter>
</ClCompile>
<ClCompile Include="..\src\libdivsufsort\lib\sssort.c">
<Filter>Fichiers sources\libdivsufsort\lib</Filter>
</ClCompile>
<ClCompile Include="..\src\libdivsufsort\lib\trsort.c">
<Filter>Fichiers sources\libdivsufsort\lib</Filter>
</ClCompile>
<ClCompile Include="..\src\frame.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\matchfinder.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\lzsa.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\stream.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\expand_streaming.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\expand_inmem.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\dictionary.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\shrink_context.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\shrink_streaming.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\expand_context.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\expand_block_v1.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\expand_block_v2.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\shrink_block_v1.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\shrink_block_v2.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\shrink_inmem.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c">
<Filter>Fichiers sources\libdivsufsort\lib</Filter>
</ClCompile>
</ItemGroup>
</Project>

27
VS2017/lzsa.vcxproj.user Executable file
View File

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LocalDebuggerCommand>$(TargetPath)</LocalDebuggerCommand>
<LocalDebuggerCommandArguments>-f2 -c -v corpus/zxspectrum/graphics/bfox-dont_go_away_(2010).mg1 bfox.lzs</LocalDebuggerCommandArguments>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerWorkingDirectory>$(ProjectDir)..\</LocalDebuggerWorkingDirectory>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LocalDebuggerCommand>$(TargetPath)</LocalDebuggerCommand>
<LocalDebuggerCommandArguments>-f2 -c -v corpus/zxspectrum/graphics/bfox-dont_go_away_(2010).mg1 bfox.lzs</LocalDebuggerCommandArguments>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerWorkingDirectory>$(ProjectDir)..\</LocalDebuggerWorkingDirectory>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LocalDebuggerCommand>$(TargetPath)</LocalDebuggerCommand>
<LocalDebuggerCommandArguments>-f2 -c -v corpus/zxspectrum/graphics/bfox-dont_go_away_(2010).mg1 bfox.lzs</LocalDebuggerCommandArguments>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerWorkingDirectory>$(ProjectDir)..\</LocalDebuggerWorkingDirectory>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LocalDebuggerCommand>$(TargetPath)</LocalDebuggerCommand>
<LocalDebuggerCommandArguments>-f2 -c -v corpus/zxspectrum/graphics/bfox-dont_go_away_(2010).mg1 bfox.lzs</LocalDebuggerCommandArguments>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerWorkingDirectory>$(ProjectDir)..\</LocalDebuggerWorkingDirectory>
</PropertyGroup>
</Project>

View File

@ -0,0 +1,429 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 50;
objects = {
/* Begin PBXBuildFile section */
0CADC63122AAD8EB003E9821 /* shrink_inmem.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC5EE22AAD8EA003E9821 /* shrink_inmem.c */; };
0CADC63222AAD8EB003E9821 /* frame.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC5F322AAD8EB003E9821 /* frame.c */; };
0CADC63322AAD8EB003E9821 /* matchfinder.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC5F422AAD8EB003E9821 /* matchfinder.c */; };
0CADC63422AAD8EB003E9821 /* shrink_block_v1.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC5FA22AAD8EB003E9821 /* shrink_block_v1.c */; };
0CADC63A22AAD8EB003E9821 /* trsort.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC61622AAD8EB003E9821 /* trsort.c */; };
0CADC63B22AAD8EB003E9821 /* divsufsort.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC61722AAD8EB003E9821 /* divsufsort.c */; };
0CADC63D22AAD8EB003E9821 /* sssort.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC61922AAD8EB003E9821 /* sssort.c */; };
0CADC63E22AAD8EB003E9821 /* expand_block_v1.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62122AAD8EB003E9821 /* expand_block_v1.c */; };
0CADC63F22AAD8EB003E9821 /* lzsa.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62222AAD8EB003E9821 /* lzsa.c */; };
0CADC64022AAD8EB003E9821 /* shrink_streaming.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62322AAD8EB003E9821 /* shrink_streaming.c */; };
0CADC64122AAD8EB003E9821 /* expand_inmem.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62522AAD8EB003E9821 /* expand_inmem.c */; };
0CADC64222AAD8EB003E9821 /* stream.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62922AAD8EB003E9821 /* stream.c */; };
0CADC64322AAD8EB003E9821 /* expand_block_v2.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62A22AAD8EB003E9821 /* expand_block_v2.c */; };
0CADC64422AAD8EB003E9821 /* shrink_context.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62B22AAD8EB003E9821 /* shrink_context.c */; };
0CADC64522AAD8EB003E9821 /* expand_streaming.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62D22AAD8EB003E9821 /* expand_streaming.c */; };
0CADC64622AAD8EB003E9821 /* dictionary.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62E22AAD8EB003E9821 /* dictionary.c */; };
0CADC64722AAD8EB003E9821 /* expand_context.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62F22AAD8EB003E9821 /* expand_context.c */; };
0CADC64822AAD8EB003E9821 /* shrink_block_v2.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC63022AAD8EB003E9821 /* shrink_block_v2.c */; };
0CADC64A22AB8DAD003E9821 /* divsufsort_utils.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC64922AB8DAD003E9821 /* divsufsort_utils.c */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
0CADC57622A65EA4003E9821 /* CopyFiles */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = /usr/share/man/man1/;
dstSubfolderSpec = 0;
files = (
);
runOnlyForDeploymentPostprocessing = 1;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
0CADC57822A65EA5003E9821 /* lzsa */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = lzsa; sourceTree = BUILT_PRODUCTS_DIR; };
0CADC5ED22AAD8EA003E9821 /* expand_streaming.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_streaming.h; path = ../../src/expand_streaming.h; sourceTree = "<group>"; };
0CADC5EE22AAD8EA003E9821 /* shrink_inmem.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_inmem.c; path = ../../src/shrink_inmem.c; sourceTree = "<group>"; };
0CADC5EF22AAD8EB003E9821 /* stream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = stream.h; path = ../../src/stream.h; sourceTree = "<group>"; };
0CADC5F022AAD8EB003E9821 /* expand_block_v1.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_block_v1.h; path = ../../src/expand_block_v1.h; sourceTree = "<group>"; };
0CADC5F122AAD8EB003E9821 /* shrink_block_v1.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_block_v1.h; path = ../../src/shrink_block_v1.h; sourceTree = "<group>"; };
0CADC5F222AAD8EB003E9821 /* lib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = lib.h; path = ../../src/lib.h; sourceTree = "<group>"; };
0CADC5F322AAD8EB003E9821 /* frame.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = frame.c; path = ../../src/frame.c; sourceTree = "<group>"; };
0CADC5F422AAD8EB003E9821 /* matchfinder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = matchfinder.c; path = ../../src/matchfinder.c; sourceTree = "<group>"; };
0CADC5F522AAD8EB003E9821 /* matchfinder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = matchfinder.h; path = ../../src/matchfinder.h; sourceTree = "<group>"; };
0CADC5F622AAD8EB003E9821 /* dictionary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = dictionary.h; path = ../../src/dictionary.h; sourceTree = "<group>"; };
0CADC5F722AAD8EB003E9821 /* shrink_context.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_context.h; path = ../../src/shrink_context.h; sourceTree = "<group>"; };
0CADC5F822AAD8EB003E9821 /* shrink_inmem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_inmem.h; path = ../../src/shrink_inmem.h; sourceTree = "<group>"; };
0CADC5F922AAD8EB003E9821 /* expand_block_v2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_block_v2.h; path = ../../src/expand_block_v2.h; sourceTree = "<group>"; };
0CADC5FA22AAD8EB003E9821 /* shrink_block_v1.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_block_v1.c; path = ../../src/shrink_block_v1.c; sourceTree = "<group>"; };
0CADC5FB22AAD8EB003E9821 /* expand_context.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_context.h; path = ../../src/expand_context.h; sourceTree = "<group>"; };
0CADC60922AAD8EB003E9821 /* divsufsort_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort_private.h; sourceTree = "<group>"; };
0CADC60A22AAD8EB003E9821 /* divsufsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort.h; sourceTree = "<group>"; };
0CADC61622AAD8EB003E9821 /* trsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = trsort.c; sourceTree = "<group>"; };
0CADC61722AAD8EB003E9821 /* divsufsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort.c; sourceTree = "<group>"; };
0CADC61922AAD8EB003E9821 /* sssort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sssort.c; sourceTree = "<group>"; };
0CADC62122AAD8EB003E9821 /* expand_block_v1.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_block_v1.c; path = ../../src/expand_block_v1.c; sourceTree = "<group>"; };
0CADC62222AAD8EB003E9821 /* lzsa.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = lzsa.c; path = ../../src/lzsa.c; sourceTree = "<group>"; };
0CADC62322AAD8EB003E9821 /* shrink_streaming.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_streaming.c; path = ../../src/shrink_streaming.c; sourceTree = "<group>"; };
0CADC62422AAD8EB003E9821 /* format.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = format.h; path = ../../src/format.h; sourceTree = "<group>"; };
0CADC62522AAD8EB003E9821 /* expand_inmem.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_inmem.c; path = ../../src/expand_inmem.c; sourceTree = "<group>"; };
0CADC62622AAD8EB003E9821 /* shrink_block_v2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_block_v2.h; path = ../../src/shrink_block_v2.h; sourceTree = "<group>"; };
0CADC62722AAD8EB003E9821 /* expand_inmem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_inmem.h; path = ../../src/expand_inmem.h; sourceTree = "<group>"; };
0CADC62822AAD8EB003E9821 /* shrink_streaming.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_streaming.h; path = ../../src/shrink_streaming.h; sourceTree = "<group>"; };
0CADC62922AAD8EB003E9821 /* stream.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = stream.c; path = ../../src/stream.c; sourceTree = "<group>"; };
0CADC62A22AAD8EB003E9821 /* expand_block_v2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_block_v2.c; path = ../../src/expand_block_v2.c; sourceTree = "<group>"; };
0CADC62B22AAD8EB003E9821 /* shrink_context.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_context.c; path = ../../src/shrink_context.c; sourceTree = "<group>"; };
0CADC62C22AAD8EB003E9821 /* frame.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = frame.h; path = ../../src/frame.h; sourceTree = "<group>"; };
0CADC62D22AAD8EB003E9821 /* expand_streaming.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_streaming.c; path = ../../src/expand_streaming.c; sourceTree = "<group>"; };
0CADC62E22AAD8EB003E9821 /* dictionary.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = dictionary.c; path = ../../src/dictionary.c; sourceTree = "<group>"; };
0CADC62F22AAD8EB003E9821 /* expand_context.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_context.c; path = ../../src/expand_context.c; sourceTree = "<group>"; };
0CADC63022AAD8EB003E9821 /* shrink_block_v2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_block_v2.c; path = ../../src/shrink_block_v2.c; sourceTree = "<group>"; };
0CADC64922AB8DAD003E9821 /* divsufsort_utils.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort_utils.c; sourceTree = "<group>"; };
0CADC64B22AB8DC3003E9821 /* divsufsort_config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort_config.h; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
0CADC57522A65EA4003E9821 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
0CADC56F22A65EA4003E9821 = {
isa = PBXGroup;
children = (
0CADC57A22A65EA5003E9821 /* lzsa */,
0CADC57922A65EA5003E9821 /* Products */,
);
sourceTree = "<group>";
};
0CADC57922A65EA5003E9821 /* Products */ = {
isa = PBXGroup;
children = (
0CADC57822A65EA5003E9821 /* lzsa */,
);
name = Products;
sourceTree = "<group>";
};
0CADC57A22A65EA5003E9821 /* lzsa */ = {
isa = PBXGroup;
children = (
0CADC62E22AAD8EB003E9821 /* dictionary.c */,
0CADC5F622AAD8EB003E9821 /* dictionary.h */,
0CADC62122AAD8EB003E9821 /* expand_block_v1.c */,
0CADC5F022AAD8EB003E9821 /* expand_block_v1.h */,
0CADC62A22AAD8EB003E9821 /* expand_block_v2.c */,
0CADC5F922AAD8EB003E9821 /* expand_block_v2.h */,
0CADC62F22AAD8EB003E9821 /* expand_context.c */,
0CADC5FB22AAD8EB003E9821 /* expand_context.h */,
0CADC62522AAD8EB003E9821 /* expand_inmem.c */,
0CADC62722AAD8EB003E9821 /* expand_inmem.h */,
0CADC62D22AAD8EB003E9821 /* expand_streaming.c */,
0CADC5ED22AAD8EA003E9821 /* expand_streaming.h */,
0CADC62422AAD8EB003E9821 /* format.h */,
0CADC5F322AAD8EB003E9821 /* frame.c */,
0CADC62C22AAD8EB003E9821 /* frame.h */,
0CADC5F222AAD8EB003E9821 /* lib.h */,
0CADC5FC22AAD8EB003E9821 /* libdivsufsort */,
0CADC62222AAD8EB003E9821 /* lzsa.c */,
0CADC5F422AAD8EB003E9821 /* matchfinder.c */,
0CADC5F522AAD8EB003E9821 /* matchfinder.h */,
0CADC5FA22AAD8EB003E9821 /* shrink_block_v1.c */,
0CADC5F122AAD8EB003E9821 /* shrink_block_v1.h */,
0CADC63022AAD8EB003E9821 /* shrink_block_v2.c */,
0CADC62622AAD8EB003E9821 /* shrink_block_v2.h */,
0CADC62B22AAD8EB003E9821 /* shrink_context.c */,
0CADC5F722AAD8EB003E9821 /* shrink_context.h */,
0CADC5EE22AAD8EA003E9821 /* shrink_inmem.c */,
0CADC5F822AAD8EB003E9821 /* shrink_inmem.h */,
0CADC62322AAD8EB003E9821 /* shrink_streaming.c */,
0CADC62822AAD8EB003E9821 /* shrink_streaming.h */,
0CADC62922AAD8EB003E9821 /* stream.c */,
0CADC5EF22AAD8EB003E9821 /* stream.h */,
);
path = lzsa;
sourceTree = "<group>";
};
0CADC5FC22AAD8EB003E9821 /* libdivsufsort */ = {
isa = PBXGroup;
children = (
0CADC60322AAD8EB003E9821 /* include */,
0CADC61422AAD8EB003E9821 /* lib */,
);
name = libdivsufsort;
path = ../../src/libdivsufsort;
sourceTree = "<group>";
};
0CADC60322AAD8EB003E9821 /* include */ = {
isa = PBXGroup;
children = (
0CADC64B22AB8DC3003E9821 /* divsufsort_config.h */,
0CADC60922AAD8EB003E9821 /* divsufsort_private.h */,
0CADC60A22AAD8EB003E9821 /* divsufsort.h */,
);
path = include;
sourceTree = "<group>";
};
0CADC61422AAD8EB003E9821 /* lib */ = {
isa = PBXGroup;
children = (
0CADC64922AB8DAD003E9821 /* divsufsort_utils.c */,
0CADC61622AAD8EB003E9821 /* trsort.c */,
0CADC61722AAD8EB003E9821 /* divsufsort.c */,
0CADC61922AAD8EB003E9821 /* sssort.c */,
);
path = lib;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
0CADC57722A65EA4003E9821 /* lzsa */ = {
isa = PBXNativeTarget;
buildConfigurationList = 0CADC57F22A65EA5003E9821 /* Build configuration list for PBXNativeTarget "lzsa" */;
buildPhases = (
0CADC57422A65EA4003E9821 /* Sources */,
0CADC57522A65EA4003E9821 /* Frameworks */,
0CADC57622A65EA4003E9821 /* CopyFiles */,
);
buildRules = (
);
dependencies = (
);
name = lzsa;
productName = lzsa;
productReference = 0CADC57822A65EA5003E9821 /* lzsa */;
productType = "com.apple.product-type.tool";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
0CADC57022A65EA4003E9821 /* Project object */ = {
isa = PBXProject;
attributes = {
LastUpgradeCheck = 1020;
ORGANIZATIONNAME = Emmanuel;
TargetAttributes = {
0CADC57722A65EA4003E9821 = {
CreatedOnToolsVersion = 10.2.1;
};
};
};
buildConfigurationList = 0CADC57322A65EA4003E9821 /* Build configuration list for PBXProject "lzsa" */;
compatibilityVersion = "Xcode 9.3";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
);
mainGroup = 0CADC56F22A65EA4003E9821;
productRefGroup = 0CADC57922A65EA5003E9821 /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
0CADC57722A65EA4003E9821 /* lzsa */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
0CADC57422A65EA4003E9821 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
0CADC64822AAD8EB003E9821 /* shrink_block_v2.c in Sources */,
0CADC63D22AAD8EB003E9821 /* sssort.c in Sources */,
0CADC64322AAD8EB003E9821 /* expand_block_v2.c in Sources */,
0CADC63F22AAD8EB003E9821 /* lzsa.c in Sources */,
0CADC64422AAD8EB003E9821 /* shrink_context.c in Sources */,
0CADC64522AAD8EB003E9821 /* expand_streaming.c in Sources */,
0CADC63E22AAD8EB003E9821 /* expand_block_v1.c in Sources */,
0CADC63122AAD8EB003E9821 /* shrink_inmem.c in Sources */,
0CADC63B22AAD8EB003E9821 /* divsufsort.c in Sources */,
0CADC64622AAD8EB003E9821 /* dictionary.c in Sources */,
0CADC63422AAD8EB003E9821 /* shrink_block_v1.c in Sources */,
0CADC64A22AB8DAD003E9821 /* divsufsort_utils.c in Sources */,
0CADC64222AAD8EB003E9821 /* stream.c in Sources */,
0CADC64022AAD8EB003E9821 /* shrink_streaming.c in Sources */,
0CADC63A22AAD8EB003E9821 /* trsort.c in Sources */,
0CADC64122AAD8EB003E9821 /* expand_inmem.c in Sources */,
0CADC63322AAD8EB003E9821 /* matchfinder.c in Sources */,
0CADC64722AAD8EB003E9821 /* expand_context.c in Sources */,
0CADC63222AAD8EB003E9821 /* frame.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
0CADC57D22A65EA5003E9821 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "-";
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = c99;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (
../src/libdivsufsort/include,
../src/xxhash,
../src,
);
LLVM_LTO = YES;
MACOSX_DEPLOYMENT_TARGET = 10.8;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
OTHER_CFLAGS = "";
SDKROOT = macosx;
};
name = Debug;
};
0CADC57E22A65EA5003E9821 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "-";
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = c99;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 3;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (
../src/libdivsufsort/include,
../src/xxhash,
../src,
);
LLVM_LTO = YES;
MACOSX_DEPLOYMENT_TARGET = 10.8;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
OTHER_CFLAGS = "";
SDKROOT = macosx;
};
name = Release;
};
0CADC58022A65EA5003E9821 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Debug;
};
0CADC58122A65EA5003E9821 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
0CADC57322A65EA4003E9821 /* Build configuration list for PBXProject "lzsa" */ = {
isa = XCConfigurationList;
buildConfigurations = (
0CADC57D22A65EA5003E9821 /* Debug */,
0CADC57E22A65EA5003E9821 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
0CADC57F22A65EA5003E9821 /* Build configuration list for PBXNativeTarget "lzsa" */ = {
isa = XCConfigurationList;
buildConfigurations = (
0CADC58022A65EA5003E9821 /* Debug */,
0CADC58122A65EA5003E9821 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 0CADC57022A65EA4003E9821 /* Project object */;
}

View File

@ -0,0 +1,305 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty, Peter Ferrie
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
DECOMPRESS_LZSA1_FAST
LDY #$00
DECODE_TOKEN
JSR GETSRC ; read token byte: O|LLL|MMMM
PHA ; preserve token on stack
AND #$70 ; isolate literals count
BEQ NO_LITERALS ; skip if no literals to copy
CMP #$70 ; LITERALS_RUN_LEN?
BNE PREPARE_COPY_LITERALS ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$F9 ; (LITERALS_RUN_LEN)
BCC PREPARE_COPY_LITERALS_DIRECT
BEQ LARGE_VARLEN_LITERALS ; if adding up to zero, go grab 16-bit count
JSR GETSRC ; get single extended byte of variable literals count
INY ; add 256 to literals count
BCS PREPARE_COPY_LITERALS_DIRECT ; (*like JMP PREPARE_COPY_LITERALS_DIRECT but shorter)
LARGE_VARLEN_LITERALS ; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
TXA
JMP PREPARE_COPY_LARGE_LITERALS
PREPARE_COPY_LITERALS
TAX
LDA SHIFT_TABLE-1,X ; shift literals length into place
; -1 because position 00 is reserved
PREPARE_COPY_LITERALS_DIRECT
TAX
PREPARE_COPY_LARGE_LITERALS
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
BMI GET_LONG_OFFSET ; $80: 16 bit offset
JSR GETSRC ; get 8 bit offset from stream in A
TAX ; save for later
LDA #$FF ; high 8 bits
BNE GOT_OFFSET ; go prepare match
; (*like JMP GOT_OFFSET but shorter)
SHORT_VARLEN_MATCHLEN
JSR GETSRC ; get single extended byte of variable match len
INY ; add 256 to match length
PREPARE_COPY_MATCH
TAX
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
LDA COPY_MATCH_LOOP+1
BEQ GETMATCH_ADJ_HI
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
BEQ GETMATCH_ADJ_HI
GETMATCH_DONE
}
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
BEQ DECODE_TOKEN ; (*like JMP DECODE_TOKEN but shorter)
!ifdef BACKWARD_DECOMPRESS {
GETMATCH_ADJ_HI
DEC COPY_MATCH_LOOP+2
JMP GETMATCH_DONE
} else {
GETMATCH_ADJ_HI
INC COPY_MATCH_LOOP+2
JMP GETMATCH_DONE
}
GET_LONG_OFFSET ; handle 16 bit offset:
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
GOT_OFFSET
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
STA OFFSHI ; store high 8 bits of offset
STX OFFSLO
SEC ; substract dest - match offset
LDA PUTDST+1
OFFSLO = *+1
SBC #$AA ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA PUTDST+2
OFFSHI = *+1
SBC #$AA ; high 8 bits
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
} else {
; Forward decompression - add match offset
STA OFFSHI ; store high 8 bits of offset
TXA
CLC ; add dest + match offset
ADC PUTDST+1 ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
PLA ; retrieve token from stack again
AND #$0F ; isolate match len (MMMM)
ADC #$02 ; plus carry which is always set by the high ADC
CMP #$12 ; MATCH_RUN_LEN?
BCC PREPARE_COPY_MATCH ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
BCC PREPARE_COPY_MATCH
BNE SHORT_VARLEN_MATCHLEN
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
; large match length with zero high byte?
BNE PREPARE_COPY_MATCH_Y ; if not, continue
DECOMPRESSION_DONE
RTS
SHIFT_TABLE
!BYTE $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
!BYTE $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01
!BYTE $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02
!BYTE $03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03
!BYTE $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04
!BYTE $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05
!BYTE $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06
!BYTE $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
LDA PUTDST+1
BEQ PUTDST_ADJ_HI
DEC PUTDST+1
RTS
PUTDST_ADJ_HI
DEC PUTDST+2
DEC PUTDST+1
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
PHA
LDA GETSRC+1
BEQ GETSRC_ADJ_HI
DEC GETSRC+1
PLA
RTS
GETSRC_ADJ_HI
DEC GETSRC+2
DEC GETSRC+1
PLA
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BEQ PUTDST_ADJ_HI
RTS
PUTDST_ADJ_HI
INC PUTDST+2
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BEQ GETSRC_ADJ_HI
RTS
GETSRC_ADJ_HI
INC GETSRC+2
RTS
}

View File

@ -0,0 +1,363 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA2 block.
; Create one with lzsa -r -f2 <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b -f2 <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty, Peter Ferrie
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
NIBCOUNT = $FC ; zero-page location for temp offset
DECOMPRESS_LZSA2_FAST
LDY #$00
STY NIBCOUNT
DECODE_TOKEN
JSR GETSRC ; read token byte: XYZ|LL|MMM
PHA ; preserve token on stack
AND #$18 ; isolate literals count (LL)
BEQ NO_LITERALS ; skip if no literals to copy
CMP #$18 ; LITERALS_RUN_LEN_V2?
BCC PREPARE_COPY_LITERALS ; if less, count is directly embedded in token
JSR GETNIBBLE ; get extra literals length nibble
; add nibble to len from token
ADC #$02 ; (LITERALS_RUN_LEN_V2) minus carry
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
BCC PREPARE_COPY_LITERALS_DIRECT ; if less, literals count is complete
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; overflow?
JMP PREPARE_COPY_LITERALS_DIRECT
PREPARE_COPY_LITERALS_LARGE
; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
BCS PREPARE_COPY_LITERALS_HIGH ; (*same as JMP PREPARE_COPY_LITERALS_HIGH but shorter)
PREPARE_COPY_LITERALS
LSR ; shift literals count into place
LSR
LSR
PREPARE_COPY_LITERALS_DIRECT
TAX
BCS PREPARE_COPY_LITERALS_LARGE ; if so, literals count is large
PREPARE_COPY_LITERALS_HIGH
TXA
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
ASL
BCS REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset
ASL ; 0YZ: 5 or 9 bit offset
BCS OFFSET_9_BIT
; 00Z: 5 bit offset
LDX #$FF ; set offset bits 15-8 to 1
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
ORA #$E0 ; set bits 7-5 to 1
BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match
OFFSET_9_BIT ; 01Z: 9 bit offset
;;ASL ; shift Z (offset bit 8) in place
ROL
ROL
AND #$01
EOR #$FF ; set offset bits 15-9 to 1
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_LARGE_OFFSET
ASL ; 13 bit offset?
BCS REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not
; 10Z: 13 bit offset
JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9
ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_16_BIT ; rep-match or 16 bit offset
;;ASL ; XYZ=111?
BMI REP_MATCH ; reuse previous offset if so (rep-match)
; 110: handle 16 bit offset
JSR GETSRC ; grab high 8 bits
GOT_OFFSET_HI
TAX
JSR GETSRC ; grab low 8 bits
GOT_OFFSET_LO
STA OFFSLO ; store low byte of match offset
STX OFFSHI ; store high byte of match offset
REP_MATCH
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
SEC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
SBC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA PUTDST+2
OFFSHI = *+1
SBC #$AA ; high 8 bits
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
} else {
; Forward decompression - add match offset
CLC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
ADC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
PLA ; retrieve token from stack again
AND #$07 ; isolate match len (MMM)
ADC #$01 ; add MIN_MATCH_SIZE_V2 and carry
CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
BCC PREPARE_COPY_MATCH ; if less, length is directly embedded in token
JSR GETNIBBLE ; get extra match length nibble
; add nibble to len from token
ADC #$08 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
BCC PREPARE_COPY_MATCH ; if less, match length is complete
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$E8 ; overflow?
PREPARE_COPY_MATCH
TAX
BCC PREPARE_COPY_MATCH_Y ; if not, the match length is complete
BEQ DECOMPRESSION_DONE ; if EOD code, bail
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
LDA COPY_MATCH_LOOP+1
BEQ GETMATCH_ADJ_HI
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
BEQ GETMATCH_ADJ_HI
GETMATCH_DONE
}
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
JMP DECODE_TOKEN
!ifdef BACKWARD_DECOMPRESS {
GETMATCH_ADJ_HI
DEC COPY_MATCH_LOOP+2
JMP GETMATCH_DONE
} else {
GETMATCH_ADJ_HI
INC COPY_MATCH_LOOP+2
JMP GETMATCH_DONE
}
GETCOMBINEDBITS
EOR #$80
ASL
PHP
JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4)
PLP ; merge Z bit as the carry bit (for offset bit 0)
COMBINEDBITZ
ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
DECOMPRESSION_DONE
RTS
GETNIBBLE
NIBBLES = *+1
LDA #$AA
LSR NIBCOUNT
BCC NEED_NIBBLES
AND #$0F ; isolate low 4 bits of nibble
RTS
NEED_NIBBLES
INC NIBCOUNT
JSR GETSRC ; get 2 nibbles
STA NIBBLES
LSR
LSR
LSR
LSR
SEC
RTS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
LDA PUTDST+1
BEQ PUTDST_ADJ_HI
DEC PUTDST+1
RTS
PUTDST_ADJ_HI
DEC PUTDST+2
DEC PUTDST+1
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
PHA
LDA GETSRC+1
BEQ GETSRC_ADJ_HI
DEC GETSRC+1
PLA
RTS
GETSRC_ADJ_HI
DEC GETSRC+2
DEC GETSRC+1
PLA
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BEQ PUTDST_ADJ_HI
RTS
PUTDST_ADJ_HI
INC PUTDST+2
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BEQ GETSRC_ADJ_HI
RTS
GETSRC_ADJ_HI
INC GETSRC+2
RTS
}

View File

@ -0,0 +1,353 @@
; ***************************************************************************
; ***************************************************************************
;
; lzsa1_6502.s
;
; NMOS 6502 decompressor for data stored in Emmanuel Marty's LZSA1 format.
;
; This code is written for the ACME assembler.
;
; Optional code is presented for one minor 6502 optimization that breaks
; compatibility with the current LZSA1 format standard.
;
; The code is 168 bytes for the small version, and 205 bytes for the normal.
;
; Copyright John Brandwood 2019.
;
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
;
; ***************************************************************************
; ***************************************************************************
; ***************************************************************************
; ***************************************************************************
;
; Decompression Options & Macros
;
;
; Choose size over space (within sane limits)?
;
LZSA_SMALL_SIZE = 0
;
; Remove code inlining to save space?
;
; This saves 15 bytes of code at the cost of 7% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_NO_INLINE = 1
} else {
LZSA_NO_INLINE = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 15% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_CP = 1
} else {
LZSA_SHORT_CP = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 30% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_LZ = 1
} else {
LZSA_SHORT_LZ = 0
}
;
; Macro to increment the source pointer to the next page.
;
; This should call a subroutine to determine if a bank
; has been crossed, and a new bank should be paged in.
;
!macro LZSA_INC_PAGE {
inc <lzsa_srcptr + 1
}
;
; Macro to read a byte from the compressed source data.
;
!if LZSA_NO_INLINE {
!macro LZSA_GET_SRC {
jsr lzsa1_get_byte
}
} else {
!macro LZSA_GET_SRC {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .skip
+LZSA_INC_PAGE
.skip:
}
}
; ***************************************************************************
; ***************************************************************************
;
; Data usage is last 8 bytes of zero-page.
;
!if (LZSA_SHORT_CP | LZSA_SHORT_LZ) {
lzsa_length = $F8 ; 1 byte.
}
lzsa_cmdbuf = $F9 ; 1 byte.
lzsa_winptr = $FA ; 1 word.
lzsa_srcptr = $FC ; 1 word.
lzsa_dstptr = $FE ; 1 word.
LZSA_SRC_LO = $FC
LZSA_SRC_HI = $FD
LZSA_DST_LO = $FE
LZSA_DST_HI = $FF
; ***************************************************************************
; ***************************************************************************
;
; lzsa1_unpack - Decompress data stored in Emmanuel Marty's LZSA1 format.
;
; Args: lzsa_srcptr = ptr to compessed data
; Args: lzsa_dstptr = ptr to output buffer
; Uses: lots!
;
DECOMPRESS_LZSA1_FAST:
lzsa1_unpack: ldy #0 ; Initialize source index.
ldx #0 ; Initialize hi-byte of length.
;
; Copy bytes from compressed source data.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.cp_length: +LZSA_GET_SRC
sta <lzsa_cmdbuf ; Preserve this for later.
and #$70 ; Extract literal length.
beq .lz_offset ; Skip directly to match?
lsr ; Get 3-bit literal length.
lsr
lsr
lsr
cmp #$07 ; Extended length?
bne .got_cp_len
jsr .get_length ; CS from CMP, X=0.
!if LZSA_SHORT_CP {
.got_cp_len: cmp #0 ; Check the lo-byte of length.
beq .put_cp_len
inx ; Increment # of pages to copy.
.put_cp_len: stx <lzsa_length
tax
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
inc <lzsa_srcptr + 0
bne .skip1
inc <lzsa_srcptr + 1
.skip1: inc <lzsa_dstptr + 0
bne .skip2
inc <lzsa_dstptr + 1
.skip2: dex
bne .cp_page
dec <lzsa_length ; Any full pages left to copy?
bne .cp_page
} else {
.got_cp_len: tay ; Check the lo-byte of length.
beq .cp_page
inx ; Increment # of pages to copy.
.get_cp_src: clc ; Calc address of partial page.
adc <lzsa_srcptr + 0
sta <lzsa_srcptr + 0
bcs .get_cp_dst
dec <lzsa_srcptr + 1
.get_cp_dst: tya
clc ; Calc address of partial page.
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .get_cp_idx
dec <lzsa_dstptr + 1
.get_cp_idx: tya ; Negate the lo-byte of length.
eor #$FF
tay
iny
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
iny
bne .cp_page
inc <lzsa_srcptr + 1
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .cp_page
}
;
; Copy bytes from decompressed window.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.lz_offset: +LZSA_GET_SRC
clc
adc <lzsa_dstptr + 0
sta <lzsa_winptr + 0
lda #$FF
bit <lzsa_cmdbuf
bpl .hi_offset
+LZSA_GET_SRC
.hi_offset: adc <lzsa_dstptr + 1
sta <lzsa_winptr + 1
.lz_length: lda <lzsa_cmdbuf ; X=0 from previous loop.
and #$0F
adc #$03 - 1 ; CS from previous ADC.
cmp #$12 ; Extended length?
bne .got_lz_len
jsr .get_length ; CS from CMP, X=0.
!if LZSA_SHORT_LZ {
.got_lz_len: cmp #0 ; Check the lo-byte of length.
beq .put_lz_len
inx ; Increment # of pages to copy.
.put_lz_len: stx <lzsa_length
tax
.lz_page: lda (lzsa_winptr),y
sta (lzsa_dstptr),y
inc <lzsa_winptr + 0
bne .skip3
inc <lzsa_winptr + 1
.skip3: inc <lzsa_dstptr + 0
bne .skip4
inc <lzsa_dstptr + 1
.skip4: dex
bne .lz_page
dec <lzsa_length ; Any full pages left to copy?
bne .lz_page
jmp .cp_length ; Loop around to the beginning.
} else {
.got_lz_len: tay ; Check the lo-byte of length.
beq .lz_page
inx ; Increment # of pages to copy.
.get_lz_win: clc ; Calc address of partial page.
adc <lzsa_winptr + 0
sta <lzsa_winptr + 0
bcs .get_lz_dst
dec <lzsa_winptr + 1
.get_lz_dst: tya
clc ; Calc address of partial page.
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .get_lz_idx
dec <lzsa_dstptr + 1
.get_lz_idx: tya ; Negate the lo-byte of length.
eor #$FF
tay
iny
.lz_page: lda (lzsa_winptr),y
sta (lzsa_dstptr),y
iny
bne .lz_page
inc <lzsa_winptr + 1
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .lz_page
jmp .cp_length ; Loop around to the beginning.
}
;
; Get 16-bit length in X:A register pair.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.get_length: clc ; Add on the next byte to get
adc (lzsa_srcptr),y ; the length.
inc <lzsa_srcptr + 0
bne .skip_inc
+LZSA_INC_PAGE
.skip_inc: bcc .got_length ; No overflow means done.
cmp #$00 ; Overflow to 256 or 257?
beq .extra_word
.extra_byte: inx
jmp lzsa1_get_byte ; So rare, this can be slow!
.extra_word: jsr lzsa1_get_byte ; So rare, this can be slow!
pha
jsr lzsa1_get_byte ; So rare, this can be slow!
tax
beq .finished ; Length-hi == 0 at EOF.
pla ; Length-lo.
rts
lzsa1_get_byte:
lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
beq lzsa1_next_page
.got_length: rts
lzsa1_next_page:
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
rts
.finished: pla ; Length-lo.
pla ; Decompression completed, pop
pla ; return address.
rts

View File

@ -0,0 +1,522 @@
; ***************************************************************************
; ***************************************************************************
;
; lzsa2_6502.s
;
; NMOS 6502 decompressor for data stored in Emmanuel Marty's LZSA2 format.
;
; This code is written for the ACME assembler.
;
; Optional code is presented for two minor 6502 optimizations that break
; compatibility with the current LZSA2 format standard.
;
; The code is 241 bytes for the small version, and 267 bytes for the normal.
;
; Copyright John Brandwood 2019.
;
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
;
; ***************************************************************************
; ***************************************************************************
; ***************************************************************************
; ***************************************************************************
;
; Decompression Options & Macros
;
;
; Choose size over space (within sane limits)?
;
LZSA_SMALL_SIZE = 0
;
; Remove code inlining to save space?
;
; This saves 15 bytes of code at the cost of 7% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_NO_INLINE = 1
} else {
LZSA_NO_INLINE = 0
}
;
; Use smaller code for copying literals?
;
; This saves 11 bytes of code at the cost of 5% speed.
;
!if LZSA_SMALL_SIZE {
LZSA_SHORT_CP = 1
} else {
LZSA_SHORT_CP = 0
}
;
; We will read from or write to $FFFF. This prevents the
; use of the "INC ptrhi / BNE" trick and reduces speed.
;
LZSA_USE_FFFF = 0
;
; Macro to increment the source pointer to the next page.
;
!macro LZSA_INC_PAGE {
inc <lzsa_srcptr + 1
}
;
; Macro to read a byte from the compressed source data.
;
!if LZSA_NO_INLINE {
!macro LZSA_GET_SRC {
jsr lzsa2_get_byte
}
} else {
!macro LZSA_GET_SRC {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .skip
+LZSA_INC_PAGE
.skip:
}
}
;
; Macro to speed up reading 50% of nibbles.
;
; This seems to save very few cycles compared to the
; increase in code size, and it isn't recommended.
;
LZSA_SLOW_NIBL = 1
!if (LZSA_SLOW_NIBL + LZSA_SMALL_SIZE) {
!macro LZSA_GET_NIBL {
jsr lzsa2_get_nibble ; Always call a function.
}
} else {
!macro LZSA_GET_NIBL {
lsr <lzsa_nibflg ; Is there a nibble waiting?
lda <lzsa_nibble ; Extract the lo-nibble.
bcs .skip
jsr lzsa2_new_nibble ; Extract the hi-nibble.
.skip: ora #$F0
}
}
; ***************************************************************************
; ***************************************************************************
;
; Data usage is last 11 bytes of zero-page.
;
lzsa_cmdbuf = $F5 ; 1 byte.
lzsa_nibflg = $F6 ; 1 byte.
lzsa_nibble = $F7 ; 1 byte.
lzsa_offset = $F8 ; 1 word.
lzsa_winptr = $FA ; 1 word.
lzsa_srcptr = $FC ; 1 word.
lzsa_dstptr = $FE ; 1 word.
lzsa_length = lzsa_winptr ; 1 word.
LZSA_SRC_LO = $FC
LZSA_SRC_HI = $FD
LZSA_DST_LO = $FE
LZSA_DST_HI = $FF
; ***************************************************************************
; ***************************************************************************
;
; lzsa2_unpack - Decompress data stored in Emmanuel Marty's LZSA2 format.
;
; Args: lzsa_srcptr = ptr to compessed data
; Args: lzsa_dstptr = ptr to output buffer
; Uses: lots!
;
DECOMPRESS_LZSA2_FAST:
lzsa2_unpack: ldy #0 ; Initialize source index.
sty <lzsa_nibflg ; Initialize nibble buffer.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
beq .cp_length ; always taken
.incsrc1:
inc <lzsa_srcptr + 1
bne .resume_src1 ; always taken
!if LZSA_SHORT_CP {
.incsrc2:
inc <lzsa_srcptr + 1
bne .resume_src2 ; always taken
.incdst:
inc <lzsa_dstptr + 1
bne .resume_dst ; always taken
}
}
;
; Copy bytes from compressed source data.
;
.cp_length: ldx #$00 ; Hi-byte of length or offset.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
beq .incsrc1
}
.resume_src1:
sta <lzsa_cmdbuf ; Preserve this for later.
and #$18 ; Extract literal length.
beq .lz_offset ; Skip directly to match?
lsr ; Get 2-bit literal length.
lsr
lsr
cmp #$03 ; Extended length?
bne .got_cp_len
jsr .get_length ; X=0 table index for literals.
!if LZSA_SHORT_CP {
.got_cp_len: cmp #0 ; Check the lo-byte of length.
beq .put_cp_len
inx ; Increment # of pages to copy.
.put_cp_len: stx <lzsa_length
tax
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
inc <lzsa_srcptr + 0
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
bne .skip1
inc <lzsa_srcptr + 1
.skip1: inc <lzsa_dstptr + 0
bne .skip2
inc <lzsa_dstptr + 1
.skip2:
} else {
beq .incsrc2
.resume_src2:
inc <lzsa_dstptr + 0
beq .incdst
.resume_dst:
}
dex
bne .cp_page
dec <lzsa_length ; Any full pages left to copy?
bne .cp_page
} else {
.got_cp_len: tay ; Check the lo-byte of length.
beq .cp_page
inx ; Increment # of pages to copy.
.get_cp_src: clc ; Calc address of partial page.
adc <lzsa_srcptr + 0
sta <lzsa_srcptr + 0
bcs .get_cp_dst
dec <lzsa_srcptr + 1
.get_cp_dst: tya
clc ; Calc address of partial page.
adc <lzsa_dstptr + 0
sta <lzsa_dstptr + 0
bcs .get_cp_idx
dec <lzsa_dstptr + 1
.get_cp_idx: tya ; Negate the lo-byte of length.
eor #$FF
tay
iny
.cp_page: lda (lzsa_srcptr),y
sta (lzsa_dstptr),y
iny
bne .cp_page
inc <lzsa_srcptr + 1
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .cp_page
}
; ================================
; xyz
; 00z 5-bit offset
; 01z 9-bit offset
; 10z 13-bit offset
; 110 16-bit offset
; 111 repeat offset
.lz_offset: lda <lzsa_cmdbuf
asl
bcs .get_13_16_rep
asl
bcs .get_9_bits
.get_5_bits: dex ; X=$FF
.get_13_bits: asl
php
+LZSA_GET_NIBL ; Always returns with CS.
plp
rol ; Shift into position, set C.
eor #$01
cpx #$00 ; X=$FF for a 5-bit offset.
bne .set_offset
sbc #2 ; Subtract 512 because 13-bit
; offset starts at $FE00.
bne .get_low8x ; Always NZ from previous SBC.
.get_9_bits: dex ; X=$FF if CS, X=$FE if CC.
asl
bcc .get_low8
dex
bcs .get_low8 ; Always VS from previous BIT.
.get_13_16_rep: asl
bcc .get_13_bits ; Shares code with 5-bit path.
.get_16_rep: bmi .lz_length ; Repeat previous offset.
;
; Copy bytes from decompressed window.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.get_16_bits: jsr lzsa2_get_byte ; Get hi-byte of offset.
.get_low8x: tax
.get_low8:
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC ; Get lo-byte of offset.
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
beq .incsrc3
.resume_src3:
}
.set_offset: stx <lzsa_offset + 1 ; Save new offset.
sta <lzsa_offset + 0
.lz_length: ldx #$00 ; Hi-byte of length.
lda <lzsa_cmdbuf
and #$07
clc
adc #$02
cmp #$09 ; Extended length?
bne .got_lz_len
inx
jsr .get_length ; X=1 table index for match.
.got_lz_len: eor #$FF ; Negate the lo-byte of length
tay ; and check for zero.
iny
beq .calc_lz_addr
eor #$FF
inx ; Increment # of pages to copy.
clc ; Calc destination for partial
adc <lzsa_dstptr + 0 ; page.
sta <lzsa_dstptr + 0
bcs .calc_lz_addr
dec <lzsa_dstptr + 1
.calc_lz_addr: clc ; Calc address of match.
lda <lzsa_dstptr + 0 ; N.B. Offset is negative!
adc <lzsa_offset + 0
sta <lzsa_winptr + 0
lda <lzsa_dstptr + 1
adc <lzsa_offset + 1
sta <lzsa_winptr + 1
.lz_page: lda (lzsa_winptr),y
sta (lzsa_dstptr),y
iny
bne .lz_page
inc <lzsa_winptr + 1
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .lz_page
jmp .cp_length ; Loop around to the beginning.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
.incsrc3:
inc <lzsa_srcptr + 1
bne .resume_src3 ; always taken
}
;
; Lookup tables to differentiate literal and match lengths.
;
.nibl_len_tbl: !byte 3 + $10 ; 0+3 (for literal).
!byte 9 + $10 ; 2+7 (for match).
.byte_len_tbl: !byte 18 - 1 ; 0+3+15 - CS (for literal).
!byte 24 - 1 ; 2+7+15 - CS (for match).
;
; Get 16-bit length in X:A register pair.
;
.get_length: +LZSA_GET_NIBL
cmp #$FF ; Extended length?
bcs .byte_length
adc .nibl_len_tbl,x ; Always CC from previous CMP.
.got_length: ldx #$00 ; Set hi-byte of 4 & 8 bit
rts ; lengths.
.byte_length: jsr lzsa2_get_byte ; So rare, this can be slow!
adc .byte_len_tbl,x ; Always CS from previous CMP.
bcc .got_length
beq .finished
.word_length: jsr lzsa2_get_byte ; So rare, this can be slow!
pha
jsr lzsa2_get_byte ; So rare, this can be slow!
tax
pla
rts
lzsa2_get_byte:
lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
beq lzsa2_next_page
rts
lzsa2_next_page:
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
rts
.finished: pla ; Decompression completed, pop
pla ; return address.
rts
;
; Get a nibble value from compressed data in A.
;
!if (LZSA_SLOW_NIBL | LZSA_SMALL_SIZE) {
lzsa2_get_nibble:
lsr <lzsa_nibflg ; Is there a nibble waiting?
lda <lzsa_nibble ; Extract the lo-nibble.
bcs .got_nibble
inc <lzsa_nibflg ; Reset the flag.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
beq .incsrc4
.resume_src4:
}
sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
.got_nibble: ora #$F0
rts
} else {
lzsa2_new_nibble:
inc <lzsa_nibflg ; Reset the flag.
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) {
+LZSA_GET_SRC
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
beq .incsrc4
.resume_src4:
}
sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
rts
}
!if (LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
.incsrc4:
inc <lzsa_srcptr + 1
bne .resume_src4 ; always taken
}

View File

@ -0,0 +1,270 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
DECOMPRESS_LZSA1
LDY #$00
DECODE_TOKEN
JSR GETSRC ; read token byte: O|LLL|MMMM
PHA ; preserve token on stack
AND #$70 ; isolate literals count
BEQ NO_LITERALS ; skip if no literals to copy
LSR ; shift literals count into place
LSR
LSR
LSR
CMP #$07 ; LITERALS_RUN_LEN?
BCC PREPARE_COPY_LITERALS ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$F9 ; (LITERALS_RUN_LEN)
BCC PREPARE_COPY_LITERALS
BEQ LARGE_VARLEN_LITERALS ; if adding up to zero, go grab 16-bit count
JSR GETSRC ; get single extended byte of variable literals count
INY ; add 256 to literals count
BCS PREPARE_COPY_LITERALS ; (*like JMP PREPARE_COPY_LITERALS but shorter)
LARGE_VARLEN_LITERALS ; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
TXA
PREPARE_COPY_LITERALS
TAX
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
BMI GET_LONG_OFFSET ; $80: 16 bit offset
JSR GETSRC ; get 8 bit offset from stream in A
TAX ; save for later
LDA #$FF ; high 8 bits
BNE GOT_OFFSET ; go prepare match
; (*like JMP GOT_OFFSET but shorter)
SHORT_VARLEN_MATCHLEN
JSR GETSRC ; get single extended byte of variable match len
INY ; add 256 to match length
PREPARE_COPY_MATCH
TAX
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
LDA COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
DEC COPY_MATCH_LOOP+2
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
}
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
BEQ DECODE_TOKEN ; (*like JMP DECODE_TOKEN but shorter)
GET_LONG_OFFSET ; handle 16 bit offset:
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
GOT_OFFSET
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
STA OFFSHI ; store high 8 bits of offset
STX OFFSLO
SEC ; substract dest - match offset
LDA PUTDST+1
OFFSLO = *+1
SBC #$AA ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA PUTDST+2
OFFSHI = *+1
SBC #$AA ; high 8 bits
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
} else {
; Forward decompression - add match offset
STA OFFSHI ; store high 8 bits of offset
TXA
CLC ; add dest + match offset
ADC PUTDST+1 ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
PLA ; retrieve token from stack again
AND #$0F ; isolate match len (MMMM)
ADC #$02 ; plus carry which is always set by the high ADC
CMP #$12 ; MATCH_RUN_LEN?
BCC PREPARE_COPY_MATCH ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
BCC PREPARE_COPY_MATCH
BNE SHORT_VARLEN_MATCHLEN
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
; large match length with zero high byte?
BNE PREPARE_COPY_MATCH_Y ; if not, continue
DECOMPRESSION_DONE
RTS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
LDA PUTDST+1
BNE PUTDST_DONE
DEC PUTDST+2
PUTDST_DONE
DEC PUTDST+1
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
PHA
LDA GETSRC+1
BNE GETSRC_DONE
DEC GETSRC+2
GETSRC_DONE
DEC GETSRC+1
PLA
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BNE PUTDST_DONE
INC PUTDST+2
PUTDST_DONE
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BNE GETSRC_DONE
INC GETSRC+2
GETSRC_DONE
RTS
}

View File

@ -0,0 +1,336 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA2 block.
; Create one with lzsa -r -f2 <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b -f2 <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
NIBCOUNT = $FC ; zero-page location for temp offset
DECOMPRESS_LZSA2
LDY #$00
STY NIBCOUNT
DECODE_TOKEN
JSR GETSRC ; read token byte: XYZ|LL|MMM
PHA ; preserve token on stack
AND #$18 ; isolate literals count (LL)
BEQ NO_LITERALS ; skip if no literals to copy
LSR ; shift literals count into place
LSR
LSR
CMP #$03 ; LITERALS_RUN_LEN_V2?
BCC PREPARE_COPY_LITERALS ; if less, count is directly embedded in token
JSR GETNIBBLE ; get extra literals length nibble
; add nibble to len from token
ADC #$02 ; (LITERALS_RUN_LEN_V2) minus carry
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
BCC PREPARE_COPY_LITERALS ; if less, literals count is complete
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; overflow?
PREPARE_COPY_LITERALS
TAX
BCC PREPARE_COPY_LITERALS_HIGH ; if not, literals count is complete
; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_LITERALS_HIGH
TXA
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
ASL
BCS REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset
ASL ; 0YZ: 5 or 9 bit offset
BCS OFFSET_9_BIT
; 00Z: 5 bit offset
LDX #$FF ; set offset bits 15-8 to 1
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
ORA #$E0 ; set bits 7-5 to 1
BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match
OFFSET_9_BIT ; 01Z: 9 bit offset
;;ASL ; shift Z (offset bit 8) in place
ROL
ROL
AND #$01
EOR #$FF ; set offset bits 15-9 to 1
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_LARGE_OFFSET
ASL ; 13 bit offset?
BCS REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not
; 10Z: 13 bit offset
JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9
ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_16_BIT ; rep-match or 16 bit offset
;;ASL ; XYZ=111?
BMI REP_MATCH ; reuse previous offset if so (rep-match)
; 110: handle 16 bit offset
JSR GETSRC ; grab high 8 bits
GOT_OFFSET_HI
TAX
JSR GETSRC ; grab low 8 bits
GOT_OFFSET_LO
STA OFFSLO ; store low byte of match offset
STX OFFSHI ; store high byte of match offset
REP_MATCH
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
SEC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
SBC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA PUTDST+2
OFFSHI = *+1
SBC #$AA ; high 8 bits
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
} else {
; Forward decompression - add match offset
CLC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
ADC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
PLA ; retrieve token from stack again
AND #$07 ; isolate match len (MMM)
ADC #$01 ; add MIN_MATCH_SIZE_V2 and carry
CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
BCC PREPARE_COPY_MATCH ; if less, length is directly embedded in token
JSR GETNIBBLE ; get extra match length nibble
; add nibble to len from token
ADC #$08 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
BCC PREPARE_COPY_MATCH ; if less, match length is complete
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$E8 ; overflow?
PREPARE_COPY_MATCH
TAX
BCC PREPARE_COPY_MATCH_Y ; if not, the match length is complete
BEQ DECOMPRESSION_DONE ; if EOD code, bail
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
LDA COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
DEC COPY_MATCH_LOOP+2
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
}
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
JMP DECODE_TOKEN
GETCOMBINEDBITS
EOR #$80
ASL
PHP
JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4)
PLP ; merge Z bit as the carry bit (for offset bit 0)
COMBINEDBITZ
ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
DECOMPRESSION_DONE
RTS
GETNIBBLE
NIBBLES = *+1
LDA #$AA
LSR NIBCOUNT
BCS HAS_NIBBLES
INC NIBCOUNT
JSR GETSRC ; get 2 nibbles
STA NIBBLES
LSR
LSR
LSR
LSR
SEC
HAS_NIBBLES
AND #$0F ; isolate low 4 bits of nibble
RTS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
LDA PUTDST+1
BNE PUTDST_DONE
DEC PUTDST+2
PUTDST_DONE
DEC PUTDST+1
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
PHA
LDA GETSRC+1
BNE GETSRC_DONE
DEC GETSRC+2
GETSRC_DONE
DEC GETSRC+1
PLA
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BNE PUTDST_DONE
INC PUTDST+2
PUTDST_DONE
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BNE GETSRC_DONE
INC GETSRC+2
GETSRC_DONE
RTS
}

104
asm/6809/unlzsa1.s Normal file
View File

@ -0,0 +1,104 @@
; unlzsa1.s - 6809 decompression routine for raw LZSA1 - 110 bytes
; compress with lzsa -r <original_file> <compressed_file>
;
; in: x = start of compressed data
; y = start of decompression buffer
; out: y = end of decompression buffer + 1
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa1 equ lz1token
lz1bigof lda ,x+ ; O set: load MSB 16-bit (negative, signed) offest
lz1gotof leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$0F ; isolate MMMM (embedded match length)
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,x+ ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldb ,x+ ; load 16-bit len in D (low part in B, high in A)
lda ,x+ ; (little endian)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
tstb
bne lz1gotln ; go copy matched bytes if not
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,x+ ; grab low 8 bits of len in B
lz1gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz1cpymt lda ,u+ ; copy matched byte
sta ,y+
leax -1,x ; decrement X
bne lz1cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lz1token ldb ,x+ ; load next token into B: O|LLL|MMMM
pshs b ; save it
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
bne lz1declt ; if not, we have the complete count, go unshift
ldb ,x+ ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
bra lz1gotlt ; we now have the complete count, go copy
lz1midlt tfr b,a ; copy high part of literals count into A
ldb ,x+ ; load low 8 bits of literals count
bra lz1gotlt ; we now have the complete count, go copy
lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz1cpylt lda ,u+ ; copy literal byte
sta ,y+
leax -1,x ; decrement X and update Z flag
bne lz1cpylt ; loop until all literal bytes are copied
tfr u,x
lz1nolt ldb ,x+ ; load either 8-bit or LSB 16-bit offset (negative, signed)
lda ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
lda #$ff ; set high 8 bits
bra lz1gotof

106
asm/6809/unlzsa1b.s Normal file
View File

@ -0,0 +1,106 @@
; unlzsa1b.s - 6809 backward decompression routine for raw LZSA1 - 113 bytes
; compress with lzsa -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
; y = last byte of decompression buffer
; out: y = first byte of decompressed data
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa1
leax 1,x
leay 1,y
bra lz1token
lz1bigof ldd ,--x ; O set: load long 16 bit (negative, signed) offset
lz1gotof nega ; reverse sign of offset in D
negb
sbca #0
leau d,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$0F ; isolate MMMM (embedded match length)
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,-x ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,-x ; grab low 8 bits of len in B
lz1gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz1cpymt lda ,-u ; copy matched byte
sta ,-y
leax -1,x ; decrement X
bne lz1cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lz1token ldb ,-x ; load next token into B: O|LLL|MMMM
pshs b ; save it
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
bne lz1declt ; if not, we have the complete count, go unshift
ldb ,-x ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz1gotlt ; we now have the complete count, go copy
lz1midlt tfr b,a ; copy high part of literals count into A
ldb ,-x ; load low 8 bits of literals count
bra lz1gotlt ; we now have the complete count, go copy
lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz1cpylt lda ,-u ; copy literal byte
sta ,-y
leax -1,x ; decrement X and update Z flag
bne lz1cpylt ; loop until all literal bytes are copied
tfr u,x
lz1nolt ldb ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
ldb ,-x ; O clear: load 8 bit (negative, signed) offset
lda #$ff ; set high 8 bits
bra lz1gotof

149
asm/6809/unlzsa2.s Normal file
View File

@ -0,0 +1,149 @@
; unlzsa2.s - 6809 decompression routine for raw LZSA2 - 172 bytes
; compress with lzsa -f2 -r <original_file> <compressed_file>
;
; in: x = start of compressed data
; y = start of decompression buffer
; out: y = end of decompression buffer + 1
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa2
lsr <lz2nibct,pcr ; reset nibble available flag
lz2token ldb ,x+ ; load next token into B: XYZ|LL|MMM
pshs b ; save it
andb #$18 ; isolate LL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,x+ ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
bra lz2gotlt ; we now have the complete count, go copy
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz2cpylt lda ,u+ ; copy literal byte
sta ,y+
leax -1,x ; decrement X and update Z flag
bne lz2cpylt ; loop until all literal bytes are copied
tfr u,x
lz2nolt ldb ,s ; get token again, don't pop it from the stack
lslb ; push token's X flag bit into carry
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2nibct fcb $00 ; nibble ready flag
lz2nibl ldb #$aa
lsr <lz2nibct,pcr ; nibble ready?
bcs lz2gotnb
inc <lz2nibct,pcr ; flag nibble as ready for next time
ldb ,x+ ; load two nibbles
stb <lz2nibl+1,pcr ; store nibble for next time (low 4 bits)
lsrb ; shift 4 high bits of nibble down
lsrb
lsrb
lsrb
lz2gotnb andb #$0f ; only keep low 4 bits
lz2done rts
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
lsla ; push token's Z flag bit into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
ldd ,x++ ; load high then low 8 bits of offset
lz2gotof std <lz2repof+2,pcr ; store match offset
lz2repof leau $aaaa,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$07 ; isolate MMM (embedded match length)
addb #$02 ; add MIN_MATCH_SIZE_V2
cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
bne lz2gotln ; no, we have the full match length, go copy
bsr lz2nibl ; get offset nibble in B
addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
addb ,x+ ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
ldb ,x+ ; load 16-bit len in D (low part in B, high in A)
lda ,x+ ; (little endian)
lz2gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz2cpymt lda ,u+ ; copy matched byte
sta ,y+
leax -1,x ; decrement X
bne lz2cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lbra lz2token ; go decode next token

154
asm/6809/unlzsa2b.s Normal file
View File

@ -0,0 +1,154 @@
; unlzsa2b.s - 6809 backward decompression routine for raw LZSA2 - 174 bytes
; compress with lzsa -f2 -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
; y = last byte of decompression buffer
; out: y = first byte of decompressed data
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa2
lsr <lz2nibct,pcr ; reset nibble available flag
leax 1,x
leay 1,y
lz2token ldb ,-x ; load next token into B: XYZ|LL|MMM
pshs b ; save it
andb #$18 ; isolate LLL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,-x ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz2gotlt ; we now have the complete count, go copy
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt tfr x,u
tfr d,x ; transfer 16-bit count into X
lz2cpylt lda ,-u ; copy literal byte
sta ,-y
leax -1,x ; decrement X and update Z flag
bne lz2cpylt ; loop until all literal bytes are copied
tfr u,x
lz2nolt ldb ,s ; get token again, don't pop it from the stack
lslb ; push token's X flag bit into carry
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
bra lz2lowof
lz2nibct fcb $00 ; nibble ready flag
lz2nibl ldb #$aa
lsr <lz2nibct,pcr ; nibble ready?
bcs lz2gotnb
inc <lz2nibct,pcr ; flag nibble as ready for next time
ldb ,-x ; load two nibbles
stb <lz2nibl+1,pcr ; store nibble for next time (low 4 bits)
lsrb ; shift 4 high bits of nibble down
lsrb
lsrb
lsrb
lz2gotnb andb #$0f ; only keep low 4 bits
lz2done rts
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
bra lz2lowof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
lda ,-x ; load high 8 bits of (negative, signed) offset
lz2lowof ldb ,-x ; load low 8 bits of offset
lz2gotof nega ; reverse sign of offset in D
negb
sbca #0
std <lz2repof+2,pcr ; store match offset
lz2repof leau $aaaa,y ; put backreference start address in U (dst+offset)
puls b ; restore token
clra ; clear A (high part of match length)
andb #$07 ; isolate MMM (embedded match length)
addb #$02 ; add MIN_MATCH_SIZE_V2
cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
bne lz2gotln ; no, we have the full match length, go copy
bsr lz2nibl ; get offset nibble in B
addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
addb ,-x ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
lz2gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz2cpymt lda ,-u ; copy matched byte
sta ,-y
leax -1,x ; decrement X
bne lz2cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lbra lz2token ; go decode next token

250
asm/8088/LZSA1FTA.ASM Normal file
View File

@ -0,0 +1,250 @@
; lzsa1fta.asm time-efficient decompressor implementation for 8088
; Turbo Assembler IDEAL mode dialect; can also be assembled with NASM.
;
; Usual DOS assembler SMALL model assumptions apply. This code:
; - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
; - Is interrupt-safe
; - Is not re-entrant (do not decompress while already running decompression)
; - Trashes all data and segment registers
;
; Copyright (C) 2019 Jim Leonard, Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
IDEAL
P8086
SEGMENT CODE para public
ASSUME cs:CODE, ds:CODE
PUBLIC lzsa1_decompress_speed
; ---------------------------------------------------------------------------
; Decompress raw LZSA1 block
; inputs:
; * ds:si: raw LZSA1 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
; Must declare this in the code segment:
SHR4table:
DB 00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h
DB 01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h
DB 02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h
DB 03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h
DB 04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h
DB 05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h
DB 06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h
DB 07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h
PROC lzsa1_decompress_speed NEAR
lzsa1_start:
push di ;remember decompression offset
cld ;ensure string ops move forward
mov bx,offset SHR4table
xor cx,cx
@@decode_token:
xchg cx,ax ;clear ah (cx = 0 from match copy's rep movsb)
lodsb ;read token byte: O|LLL|MMMM
mov dx,ax ;copy our token to dl for later MMMM handling
and al,070H ;isolate literals length in token (LLL)
jz @@check_offset_size ;if LLL=0, we have no literals; goto match
cmp al,070H ;LITERALS_RUN_LEN?
jne @@got_literals ;no, we have full count from token; go copy
lodsb ;grab extra length byte
add al,07H ;add LITERALS_RUN_LEN
jnc @@got_literals_exact ;if no overflow, we have full count
je @@big_literals
@@mid_literals:
lodsb ;grab single extra length byte
inc ah ;add 256
xchg cx,ax ;with longer counts, we can save some time
shr cx,1 ;by doing a word copy instead of a byte copy.
rep movsw ;We don't need to account for overlap because
adc cx,0 ;source for literals isn't the output buffer.
rep movsb
jmp @@check_offset_size
@@big_literals:
lodsw ;grab 16-bit extra length
xchg cx,ax ;with longer counts, we can save some time
shr cx,1 ;by doing a word copy instead of a byte copy.
rep movsw
adc cx,0
rep movsb
jmp @@check_offset_size
@@got_literals:
segcs xlat ;shift literals length into place
@@got_literals_exact:
xchg cx,ax
rep movsb ;copy cx literals from ds:si to es:di
@@check_offset_size:
test dl,dl ;check match offset size in token (O bit)
js @@get_long_offset ;load absolute 16-bit match offset
mov ah,0ffh ;set up high byte
lodsb ;load low byte
@@get_match_length:
xchg dx,ax ;dx: match offset ax: original token
and al,0FH ;isolate match length in token (MMMM)
cmp al,0FH ;MATCH_RUN_LEN?
jne @@got_matchlen_short ;no, we have the full match length from the token, go copy
lodsb ;grab extra length byte
add al,012H ;add MIN_MATCH_SIZE + MATCH_RUN_LEN
jnc @@do_long_copy ;if no overflow, we have the entire length
jne @@mid_matchlen
lodsw ;grab 16-bit length
xchg cx,ax ;get ready to do a long copy
jcxz @@done_decompressing ;wait, is it the EOD marker? Exit if so
jmp @@copy_len_preset ;otherwise, do the copy
@@get_long_offset:
lodsw ;Get 2-byte match offset
jmp @@get_match_length
@@got_matchlen_short:
add al,3 ;add MIN_MATCH_SIZE
xchg cx,ax ;copy match length into cx
mov bp,ds ;save ds
mov ax,es
mov ds,ax ;ds=es
xchg ax,si ;save si
mov si,di ;ds:si now points at back reference in output data
add si,dx
rep movsb ;copy match
xchg si,ax ;restore si
mov ds,bp ;restore ds
jmp @@decode_token ;go decode another token
@@done_decompressing:
pop ax ;retrieve the original decompression offset
xchg di,ax ;compute decompressed size
sub ax,di
ret ;done decompressing, exit to caller
;With a confirmed longer match length, we have an opportunity to optimize for
;the case where a single byte is repeated long enough that we can benefit
;from rep movsw to perform the run (instead of rep movsb).
@@mid_matchlen:
lodsb ;grab single extra length byte
inc ah ;add 256
@@do_long_copy:
xchg cx,ax ;copy match length into cx
@@copy_len_preset:
push ds ;save ds
mov bp,es
mov ds,bp ;ds=es
mov bp,si ;save si
mov si,di ;ds:si now points at back reference in output data
add si,dx
cmp dx,-2 ;do we have a byte/word run to optimize?
jae @@do_run ;perform a run
;You may be tempted to change "jae" to "jge" because DX is a signed number.
;Don't! The total window is 64k, so if you treat this as a signed comparison,
;you will get incorrect results for offsets over 32K.
;If we're here, we have a long copy and it isn't byte-overlapping (if it
;overlapped, we'd be in @@do_run_1) So, let's copy faster with REP MOVSW.
;This won't affect 8088 that much, but it speeds up 8086 and higher.
shr cx,1
rep movsw
adc cx,0
rep movsb
mov si,bp ;restore si
pop ds
jmp @@decode_token ;go decode another token
@@do_run:
je @@do_run_2 ;fall through to byte (common) if not word run
@@do_run_1:
lodsb ;load first byte of run into al
mov ah,al
shr cx,1
rep stosw ;perform word run
adc cx,0
rep stosb ;finish word run
mov si,bp ;restore si
pop ds
jmp @@decode_token ;go decode another token
@@do_run_2:
lodsw ;load first word of run
shr cx,1
rep stosw ;perform word run
adc cx,0 ;despite 2-byte offset, compressor might
rep stosb ;output odd length. better safe than sorry.
mov si,bp ;restore si
pop ds
jmp @@decode_token ;go decode another token
ENDP lzsa1_decompress_speed
ENDS CODE
END
;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
; original E. Marty code shuttle 123208 alice 65660 robotron 407338 ***
; table for shr al,4 shuttle 120964 alice 63230 robotron 394733 +++
; push/pop to mov/mov shuttle 118176 alice 61835 robotron 386762 +++
; movsw for literalcpys shuttle 124102 alice 64908 robotron 400220 --- rb
; stosw for byte runs shuttle 118897 alice 65040 robotron 403518 --- rb
; better stosw for runs shuttle 117712 alice 65040 robotron 403343 +--
; disable RLE by default shuttle 116924 alice 60783 robotron 381226 +++
; optimize got_matchlen shuttle 115294 alice 59588 robotron 374330 +++
; fall through to getML shuttle 113258 alice 59572 robotron 372004 +++
; fall through to midLI shuttle 113258 alice 59572 robotron 375060 ..- rb
; fall through midMaLen shuttle 113247 alice 59572 robotron 372004 +.+
; movsw for litlen > 255 shuttle 113247 alice 59572 robotron 371612 ..+
; rep stosw for long runs shuttle 113247 alice 59572 robotron 371612 ...
; rep movsw for long cpys shuttle 113247 alice 59572 robotron 371035 ..+
; xchg/dec ah -> mov ah,val shuttle 112575 alice 59272 robotron 369198 +++
; force >12h len.to longcpy shuttle 101998 alice 59266 robotron 364459 +.+
; more efficient run branch shuttle 102239 alice 59297 robotron 364716 --- rb
; even more eff. run branch shuttle 101998 alice 59266 robotron 364459 ***
; BUGFIX - bad sign compare shuttle 101955 alice 59225 robotron 364117 +++
; reverse 16-bit len compar shuttle 102000 alice 59263 robotron 364460 --- rb
; jcxz for EOD detection no change to speed, but is 1 byte shorter +++
; force movsw for literals shuttle 107183 alice 62555 robotron 379524 --- rb
; defer shr4 until necessry shuttle 102069 alice 60236 robotron 364096 ---
; skip literals if LLL=0 shuttle 98655 alice 57849 robotron 363358 ---
; fall through to mid_liter shuttle 98595 alice 57789 robotron 361998 +++
; == jumptable experiments begin ==
; jumptable for small copys shuttle 101594 alice 61078 robotron 386018 ---
; start:xchg instead of mov shuttle 100948 alice 60467 robotron 381112 +++
; use table for LLL=0 check shuttle 106972 alice 63333 robotron 388304 --- rb
; jmptbl to fallthrough mov shuttle 102532 alice 60760 robotron 383070 ---
; cpy fallthrough check_ofs shuttle 98939 alice 58917 robotron 371019 +**
; single jumptable jump shuttle 97528 alice 57264 robotron 362194 ++*
; conditional check for L=7 shuttle 98610 alice 58521 robotron 368153 --- rb
; rip out the jumptable :-/ shuttle 97616 alice 57128 robotron 360697 +++
; defer add MIN_MATCH_SIZE shuttle 97250 alice 57004 robotron 361191 ++?
; cache constants in regs shuttle 104681 alice 59939 robotron 380125 --- rb

581
asm/8088/LZSA1JMP.ASM Normal file
View File

@ -0,0 +1,581 @@
; lzsa2fta.asm time-efficient decompressor implementation for 808x CPUs.
; Turbo Assembler IDEAL mode dialect.
; (Is supposed to also assemble with NASM's IDEAL mode support, but YMMV.)
;
; This code assembles to about 3K of lookup tables and unrolled code,
; but the tradeoff for that size is the absolute fastest decompressor
; of LZSA1 block data for 808x CPUs.
; If you need moderately fast code with less size, see LZSA1FTA.ASM.
; If you need the smallest decompression code, see decompress_small_v1.S.
;
; Usual DOS assembler SMALL model assumptions apply. This code:
; - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
; - Is interrupt-safe
; - Is not re-entrant (do not decompress while already running decompression)
; - Trashes all data and segment registers
;
; Copyright (C) 2019 Jim Leonard, Emmanuel Marty
; Additional speed optimizations by Pavel Zagrebin
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
;
; ===========================================================================
;
; The key area to concentrate on when optimizing LZSA1 decompression speed is
; reducing time spent handling the shortest matches. This is for two reasons:
; 1. shorter matches are more common
; 2. short matches are least efficient in terms of decomp speed per byte
; You can confirm #1 using the --stats mode of the compressor.
;
; Branches are costly on 8086. To ensure we branch as little as possible, a
; jumptable will be used to branch directly to as many direct decode paths as
; possible. This will burn up 512 bytes of RAM for a jumptable, and a few
; hundred bytes of duplicated program code (rather than JMP/CALL common code
; blocks, we inline them to avoid the branch overhead).
;
; ===========================================================================
;
; === LZSA1 block reference:
;
; Blocks encoded as LZSA1 are composed from consecutive commands.
; Each command follows this format:
;
; token: <O|LLL|MMMM>
; optional extra literal length
; literal values
; match offset low
; optional match offset high
; optional extra encoded match length
;
;
; === LZSA1 Token Reference:
;
; 7 6 5 4 3 2 1 0
; O L L L M M M M
;
; L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for
; this command is 0 to 6, the length is encoded in the token and no extra bytes
; are required. Otherwise, a value of 7 is encoded and extra bytes follow as
; 'optional extra literal length'
;
; M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the
; encoded match length for this command is 0 to 14, it is directly stored,
; otherwise 15 is stored and extra bytes follow as 'optional extra encoded match
; length'. Except for the last command in a block, a command always contains a
; match, so the encoded match length is the actual match length, offset by the
; minimum which is 3 bytes. For instance, an actual match length of 10 bytes to
; be copied, is encoded as 7.
;
; O: set for a 2-bytes match offset, clear for a 1-byte match offset
;
;
; === Decoding extended literal length:
;
; If the literals length is 7 or more, then an extra byte follows here, with
; three possible values:
;
; 0-248: the value is added to the 7 stored in the token.
; 250: a second byte follows. The final literals value is 256 + the second byte.
; 249: a little-endian 16-bit value follows, forming the final literals value.
;
;
; === Decoding match offsets:
;
; match offset low: The low 8 bits of the match offset follows.
;
; optional match offset high: If the 'O' bit (bit 7) is set in the token, the
; high 8 bits of the match offset follow, otherwise they are understood to be all
; set to 1. For instance, a short offset of 0x70 is interpreted as 0xff70
;
;
; === Decoding extra encoded match length:
;
; optional extra encoded match length: If the encoded match length is 15 or more,
; the 'M' bits in the token form the value 15, and an extra byte follows here,
; with three possible types of value.
;
; 0-237: the value is added to the 15 stored in the token.
; The final value is 3 + 15 + this byte.
; 239: a second byte follows. The final match length is 256 + the second byte.
; 238: a second and third byte follow, forming a little-endian 16-bit value.
; The final encoded match length is that 16-bit value.
;
; ===========================================================================
IDEAL ; Use Turbo Assembler IDEAL syntax checking
P8086 ; Restrict code generation to the 808x and later
JUMPS ; Perform fixups for out-of-bound conditional jumps
; This is required for the (L=07 & M=0Fh) decode paths as they
; have the most code, but these are uncommon paths so the
; tiny speed loss in just these paths is not a concern.
;Setting OPTIMIZE_LONG_RLE to 1 speeds up decompressing long runs of the
;same 16-bit word value, but hurts decompression speed of other data
;types slightly. Turn this on if you know your data has very long 16-bit
;word-based runs (reported as RLE2 sequences in the LZSA compressor output
;with an average length of at least 32 bytes), otherwise leave it off.
OPTIMIZE_LONG_RLE EQU 0
SEGMENT CODE para public
ASSUME cs:CODE, ds:CODE
PUBLIC lzsa1_decompress_speed_jumptable
; EQU helper statements (so we can construct a jump table without going crazy)
minmatch EQU 3
litrunlen EQU 7
leml1 EQU OFFSET lit_ext_mat_len_1b
leme1 EQU OFFSET lit_ext_mat_ext_1b
leml2 EQU OFFSET lit_ext_mat_len_2b
leme2 EQU OFFSET lit_ext_mat_ext_2b
;short-circuit special cases for 0 through 6 literal copies:
l6ml1 EQU OFFSET lit_len_mat_len_1b_6
l6me1 EQU OFFSET lit_len_mat_ext_1b
l6ml2 EQU OFFSET lit_len_mat_len_2b_6
l6me2 EQU OFFSET lit_len_mat_ext_2b
l5ml1 EQU OFFSET lit_len_mat_len_1b_45
l5me1 EQU OFFSET lit_len_mat_ext_1b + 1
l5ml2 EQU OFFSET lit_len_mat_len_2b_45
l5me2 EQU OFFSET lit_len_mat_ext_2b + 1
l4ml1 EQU OFFSET lit_len_mat_len_1b_45 + 1
l4me1 EQU OFFSET lit_len_mat_ext_1b + 2
l4ml2 EQU OFFSET lit_len_mat_len_2b_45 + 1
l4me2 EQU OFFSET lit_len_mat_ext_2b + 2
l3ml1 EQU OFFSET lit_len_mat_len_1b_23
l3me1 EQU OFFSET lit_len_mat_ext_1b + 3
l3ml2 EQU OFFSET lit_len_mat_len_2b_23
l3me2 EQU OFFSET lit_len_mat_ext_2b + 3
l2ml1 EQU OFFSET lit_len_mat_len_1b_23 + 1
l2me1 EQU OFFSET lit_len_mat_ext_1b + 4
l2ml2 EQU OFFSET lit_len_mat_len_2b_23 + 1
l2me2 EQU OFFSET lit_len_mat_ext_2b + 4
l1ml1 EQU OFFSET lit_len_mat_len_1b_01
l1me1 EQU OFFSET lit_len_mat_ext_1b + 5
l1ml2 EQU OFFSET lit_len_mat_len_2b_01
l1me2 EQU OFFSET lit_len_mat_ext_2b + 5
l0ml1 EQU OFFSET lit_len_mat_len_1b_01 + 1 ; MMMM handling comes after LLL code
l0me1 EQU OFFSET lit_len_mat_ext_1b + 6 ; MMMM handling comes after LLL code
l0ml2 EQU OFFSET lit_len_mat_len_2b_01 + 1 ; MMMM handling comes after LLL code
l0me2 EQU OFFSET lit_len_mat_ext_2b + 6 ; MMMM handling comes after LLL code
; 0 1 2 3 4 5 6 7 8 9 a b c d e f
jtbl DW l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0me1 ;0
DW l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1me1 ;1
DW l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2me1 ;2
DW l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3me1 ;3
DW l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4me1 ;4
DW l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5me1 ;5
DW l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6me1 ;6
DW leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leme1 ;7
DW l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0me2 ;8
DW l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1me2 ;9
DW l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2me2 ;a
DW l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3me2 ;b
DW l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4me2 ;c
DW l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5me2 ;d
DW l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6me2 ;e
DW leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leme2 ;f
PROC lzsa1_decompress_speed_jumptable NEAR
; ---------------------------------------------------------------------------
; Decompress raw LZSA1 block
; inputs:
; * ds:si: raw LZSA1 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
MACRO get_byte_match_offset
mov ah,0ffh ;O=0, so set up offset's high byte
lodsb ;load low byte; ax=match offset
xchg bp,ax ;bp=match offset ax=00 + original token
ENDM
MACRO get_word_match_offset
lodsw ;ax=match offset
xchg bp,ax ;bp=match offset ax=00 + original token
ENDM
MACRO do_match_copy_long
LOCAL even0,even1,even2,do_run,do_run_w
; Copies a long match as optimally as possible.
; requirements: cx=length, bp=negative offset, ds:si=compdata, es:di=output
; trashes: ax, bx
; must leave cx=0 at exit
mov bx,ds ;save ds
mov ax,es
mov ds,ax ;ds=es
xchg ax,si ;save si
lea si,[bp+di] ;si = output buffer + negative match offset
cmp bp,-2 ;do we have a byte/word run to optimize?
IF OPTIMIZE_LONG_RLE
jae do_run ;catch offset = -2 or -1
ELSE
ja do_run ;catch offset = -1
ENDIF
;If we're here, we have a long copy and it isn't byte-overlapping (if it
;overlapped, we'd be in @@do_run) So, let's copy faster with REP MOVSW.
;This affects 8088 only slightly, but is a bigger win on 8086 and higher.
shr cx,1
jnc even0
movsb
even0:
rep movsw
xchg si,ax ;restore si
mov ds,bx ;restore ds
jmp decode_token
do_run:
IF OPTIMIZE_LONG_RLE
je do_run_w ;if applicable, handle word-sized value faster
ENDIF
xchg dx,ax ;save si into dx, as ax is getting trashed
lodsb ;load first byte of run into al
mov ah,al
shr cx,1
jnc even1
stosb
even1:
rep stosw ;perform word run
mov si,dx ;restore si
mov ds,bx ;restore ds
jmp decode_token
IF OPTIMIZE_LONG_RLE
do_run_w:
xchg dx,ax ;save si into dx, as ax is getting trashed
lodsw ;load first word of run
shr cx,1
rep stosw ;perform word run
jnc even2
stosb ;should be after rep stosw!
even2:
mov si,dx ;restore si
mov ds,bx ;restore ds
jmp decode_token
ENDIF
ENDM
MACRO do_match_copy
; Copies a shorter match with as little overhead as possible.
; requirements: cx=length, bp=negative offset, ds:si=compdata, es:di=output
; trashes: ax, bx
; must leave cx=0 at exit
mov bx,ds ;save ds
mov ax,es
mov ds,ax ;ds=es
xchg ax,si ;save si
lea si,[bp+di] ;si = output buffer + negative match offset
movsb
movsb
movsb ;Handle MINMATCH (instead of add cx,MINMATCH)
rep movsb
xchg si,ax ;restore si
mov ds,bx ;restore ds
jmp decode_token
ENDM
MACRO do_literal_copy
LOCAL even
; Copies a literal sequence using words.
; Meant for longer lengths; for 128 bytes or less, use REP MOVSB.
; requirements: cx=length, ds:si=compdata, es:di=output
; must leave cx=0 at exit
shr cx,1
jnc even
movsb
even:
rep movsw
ENDM
MACRO copy_small_match_len
and al,0FH ;isolate length in token (MMMM)
xchg cx,ax ;cx=match length
do_match_copy ;copy match with cx=length, bp=offset
ENDM
MACRO copy_large_match_len
LOCAL val239,val238,EOD
; Handle MMMM=Fh
; Assumptions: ah=0 from get_????_match_offset's xchg
lodsb ;grab extra match length byte
add al,0Fh+minmatch ;add MATCH_RUN_LEN + MIN_MATCH_SIZE
; jz val238 ;if zf & cf, 238: get 16-bit match length
jc val239 ;if cf, 239: get extra match length byte
xchg cx,ax ;otherwise, we have our match length
do_match_copy_long ;copy match with cx=length, bp=offset
val239:
jz val238
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_match_copy_long ;copy match with cx=length, bp=offset
val238:
lodsw ;grab 16-bit length
xchg cx,ax
jcxz EOD ;is it the EOD marker? Exit if so
do_match_copy_long ;copy match with cx=length, bp=offset
EOD:
jmp done_decompressing
ENDM
lzsa1_start:
push di ;remember decompression offset
cld ;ensure string ops move forward
xor cx,cx
decode_token:
xchg cx,ax ;clear ah (cx = 0 from match copy's REP)
lodsb ;read token byte: O|LLL|MMMM
mov bp,ax ;preserve 0+token in bp for later MMMM handling
mov bx,ax ;prep for table lookup
shl bx,1 ;adjust for offset word size
jmp [cs:jtbl+bx] ;jump directly to relevant decode path
; There are eight basic decode paths for an LZSA1 token. Each of these
; paths perform only the necessary actions to decode the token and then
; fetch the next token. This results in a lot of code duplication, but
; it is the only way to get down to two branches per token (jump to unique
; decode path, then jump back to next token) for the most common cases.
; Path #1: LLL=0-6, MMMM=0-Eh, O=0 (1-byte match offset)
; Handle LLL=0-6 by jumping directly into # of bytes to copy (6 down to 1)
lit_len_mat_len_1b_01:
movsb
get_byte_match_offset
copy_small_match_len
lit_len_mat_len_1b_23:
movsb
movsw
get_byte_match_offset
copy_small_match_len
lit_len_mat_len_1b_45:
movsb
movsw
movsw
get_byte_match_offset
copy_small_match_len
lit_len_mat_len_1b_6:
movsw
movsw
movsw
get_byte_match_offset
copy_small_match_len
; Path #2: LLL=0-6, MMMM=Fh, O=0 (1-byte match offset)
lit_len_mat_ext_1b:
movsb
movsb
movsb
movsb
movsb
movsb
get_byte_match_offset
copy_large_match_len
; Path #3: LLL=7, MMMM=0-Eh, O=0 (1-byte match offset)
lit_ext_mat_len_1b:
; on entry: ax=0 + token, bp=ax
lodsb ;grab extra literal length byte
add al,litrunlen ;add 7h literal run length
; jz @@val249_3 ;if zf & cf, 249: get 16-bit literal length
jc @@val250_3 ;if cf, 250: get extra literal length byte
xchg cx,ax ;otherwise, we have our literal length
do_literal_copy ;this might be better as rep movsw !!! benchmark
get_byte_match_offset
copy_small_match_len
@@val250_3:
jz @@val249_3
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_literal_copy
get_byte_match_offset
copy_small_match_len
@@val249_3:
lodsw ;grab 16-bit length
xchg cx,ax
do_literal_copy
get_byte_match_offset
copy_small_match_len
; Path #4: LLL=7, MMMM=Fh, O=0 (1-byte match offset)
lit_ext_mat_ext_1b:
; on entry: ax=0 + token, bp=ax
lodsb ;grab extra literal length byte
add al,litrunlen ;add 7h literal run length
; jz @@val249_4 ;if zf & cf, 249: get 16-bit literal length
jc @@val250_4 ;if cf, 250: get extra literal length byte
xchg cx,ax ;otherwise, we have our literal length
do_literal_copy ;this might be better as rep movsw !!! benchmark
get_byte_match_offset
copy_large_match_len
@@val250_4:
jz @@val249_4
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_literal_copy
get_byte_match_offset
copy_large_match_len
@@val249_4:
lodsw ;grab 16-bit length
xchg cx,ax
do_literal_copy
get_byte_match_offset
copy_large_match_len
; Path #5: LLL=0-6, MMMM=0-Eh, O=1 (2-byte match offset)
; Handle LLL=0-6 by jumping directly into # of bytes to copy (6 down to 1)
lit_len_mat_len_2b_01:
movsb
get_word_match_offset
copy_small_match_len
lit_len_mat_len_2b_23:
movsb
movsw
get_word_match_offset
copy_small_match_len
lit_len_mat_len_2b_45:
movsb
movsw
movsw
get_word_match_offset
copy_small_match_len
lit_len_mat_len_2b_6:
movsw
movsw
movsw
get_word_match_offset
copy_small_match_len
; Path #6: LLL=0-6, MMMM=Fh, O=1 (2-byte match offset)
; Path #6: LLL=0-6, MMMM=Fh, O=1 (2-byte match offset)
lit_len_mat_ext_2b:
movsb
movsb
movsb
movsb
movsb
movsb
get_word_match_offset
copy_large_match_len
; Path #7: LLL=7, MMMM=0-Eh, O=1 (2-byte match offset)
lit_ext_mat_len_2b:
; on entry: ax=0 + token, bp=ax
lodsb ;grab extra literal length byte
add al,litrunlen ;add 7h literal run length
; jz @@val249_7 ;if zf & cf, 249: get 16-bit literal length
jc @@val250_7 ;if cf, 250: get extra literal length byte
xchg cx,ax ;otherwise, we have our literal length
do_literal_copy ;this might be better as rep movsw !!! benchmark
get_word_match_offset
copy_small_match_len
@@val250_7:
jz @@val249_7
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_literal_copy
get_word_match_offset
copy_small_match_len
@@val249_7:
lodsw ;grab 16-bit length
xchg cx,ax
do_literal_copy
get_word_match_offset
copy_small_match_len
; Path #8: LLL=7, MMMM=Fh, O=1 (2-byte match offset)
lit_ext_mat_ext_2b:
; on entry: ax=0 + token, bp=ax
lodsb ;grab extra literal length byte
add al,litrunlen ;add 7h literal run length
; jz @@val249_8 ;if zf & cf, 249: get 16-bit literal length
jc @@val250_8 ;if cf, 250: get extra literal length byte
xchg cx,ax ;otherwise, we have our literal length
do_literal_copy ;this might be better as rep movsw !!! benchmark
get_word_match_offset
copy_large_match_len
@@val250_8:
jz @@val249_8
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_literal_copy
get_word_match_offset
copy_large_match_len
@@val249_8:
lodsw ;grab 16-bit length
xchg cx,ax
do_literal_copy
get_word_match_offset
copy_large_match_len
done_decompressing:
;return # of decompressed bytes in ax
pop ax ;retrieve the original decompression offset
sub di,ax ;adjust for original offset
xchg di,ax ;return adjusted value in ax
ret ;done decompressing, exit to caller
ENDP lzsa1_decompress_speed_jumptable
ENDS CODE
END
;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
; defer add MIN_MATCH_SIZE shuttle 97207 alice 57200 robotron 362884 ++*
; jumptable rewrite, no RLE shuttle 97744 alice 46905 robotron 309032 -++
; adc cx,0 -> adc cl,0 shuttle 97744 alice 46893 robotron 309032 .+.!
; jumptable rewrite w/RLE shuttle 88776 alice 50433 robotron 319222 +--
; short match copies movsb shuttle 97298 alice 49769 robotron 326282 ---rb
; long match copy #1 16-bit shuttle 92490 alice 46905 robotron 308722 +*+
; long match copy #2 extraB shuttle 92464 alice 46905 robotron 308371 +.+
; long match copy #3 0f->ed shuttle 86765 alice 46864 robotron 303895 +++!
; baseline new test harness shuttle 83925 alice 37948 robotron 269002 ***
; Pavel optimizations shuttle 82225 alice 36798 robotron 261226 +++
; OPTIMIZE_LONG_RLE 1 shuttle 82242 alice 36787 robotron 261392 **-
;
;------
;
;Pavel's optimization history:
; shuttle alice robotron time in 1.193 MHz timer clocks
;baseline 19109 D9A6 570F6
;adc cl,0->adc cl,cl 19035 D9A6 56FAB
;rep movsb->shr cx,1;jnc 18FD4 D998 56F14
;cmp bp,-2->inc bp;inc bp 18F07 D999 56EA3
;jz;jc->jc 18D81 D973 56B2F
;add al,3->movsb x3 18B1E D777 56197
;more lit_len_mat tables 18A83 D341 54ACC

302
asm/8088/LZSA2FTA.ASM Normal file
View File

@ -0,0 +1,302 @@
; lzsa2fta.asm - LZSA v2 time-efficient decompressor implementation for 8088
; Turbo Assembler IDEAL mode dialect; can also be assembled with NASM.
;
; Usual DOS assembler SMALL model assumptions apply. This code:
; - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
; - Is interrupt-safe
; - Is not re-entrant (do not decompress while already running decompression)
; - Trashes all data and segment registers
;
; Copyright (C) 2019 Jim Leonard, Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
IDEAL
P8086
MODEL SMALL
CODESEG
;While LZSA2 is technically capable of generating a match offset of -2,
;this sequence never actually showed up in my LZSA2 test corpus, likely due
;to compressor optimizations and the LZSA2 format itself. If you know your
;test data will contain a match offset of -2, you can enable code to write
;out the sequence very quickly at the cost of 18 bytes of code.
HANDLE_WORD_RUN EQU 0
PUBLIC lzsa2_decompress_speed
; ---------------------------------------------------------------------------
; Decompress raw LZSA2 block
; inputs:
; * ds:si: raw LZSA2 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
PROC lzsa2_decompress_speed NEAR
MACRO get_nybble
LOCAL has_nybble
neg bh ;nybble ready?
jns has_nybble
xchg bx,ax
lodsb ;load two nybbles
xchg bx,ax
has_nybble:
mov cl,4 ;swap 4 high and low bits of nybble
ror bl,cl
mov cl,0FH
and cl,bl
ENDM
lzsa2_speed_start:
push di ;remember decompression offset
cld ;make string operations go forward
xor cx,cx
mov bx,0100H ;bx used by get_nybble
@@decode_token:
mov ax,cx ;clear ah - cx is zero (and must stay that way)
lodsb ;read token byte: XYZ|LL|MMMM
mov dx,ax ;keep copy of token in dl
and al,018H ;isolate literals length in token (LL)
jz @@check_offset ;no literals? stop decoding, go to matches
;At this point, al can be in three (unshifted) states: 1, 2, or 3.
;3 = not done yet.
cmp al,(2 shl 3) ;LITERALS_RUN_LEN_V2? (original: cmp al,03h)
jb @@lit1b ;LZSA2 output 1-byte more often, so test first
je @@lit2b
mov cl,3
shr al,cl ;shift literals length into place
get_nybble ;cl := get extra literals length nybble
add al,cl ;add len from token to nybble
cmp al,012H ;LITERALS_RUN_LEN_V2 + 15 ?
jne @@got_literals ;if not, we have the full literals count
lodsb ;grab extra length byte
add al,012H ;overflow?
jnc @@got_literals_big ;if not, we have a big full literals count
lodsw ;grab 16-bit extra length
;For larger counts, it pays to set up a faster copy
@@got_literals_big:
xchg cx,ax
shr cx,1
rep movsw
adc cx,0
rep movsb
jmp @@check_offset
@@got_literals:
xchg cx,ax
rep movsb ;copy cx literals from ds:si to es:di
jmp @@check_offset
;LZSA2 likes to produce tiny literals of 1 or 2 bytes. Handle them here.
@@lit2b:movsb
@@lit1b:movsb
@@check_offset:
test dl,dl ;check match offset mode in token (X bit)
js @@rep_match_or_large_offset
cmp dl,040H ;check if this is a 5 or 9-bit offset (Y bit)
jnb @@offset_9_bit
;5 bit offset:
xchg cx,ax ;clear ah - cx is zero from prior rep movs
mov al,020H ;shift Z (offset bit 4) in place
and al,dl
shl al,1
shl al,1
get_nybble ;get nybble for offset bits 0-3
or al,cl ;merge nybble
rol al,1
xor al,0E1H ;set offset bits 7-5 to 1
dec ah ;set offset bits 15-8 to 1
jmp @@get_match_length
@@rep_match_or_16_bit:
test dl,020H ;test bit Z (offset bit 8)
jne @@repeat_match ;rep-match
;16 bit offset:
lodsw ;Get 2-byte match offset
xchg ah,al
jmp @@get_match_length
@@offset_9_bit:
;9 bit offset:
xchg cx,ax ;clear ah - cx is zero from prior rep movs
lodsb ;get 8 bit offset from stream in A
dec ah ;set offset bits 15-8 to 1
test dl,020H ;test bit Z (offset bit 8)
je @@get_match_length
dec ah ;clear bit 8 if Z bit is clear
jmp @@get_match_length
@@rep_match_or_large_offset:
cmp dl,0c0H ;check if this is a 13-bit offset
;or a 16-bit offset/rep match (Y bit)
jnb @@rep_match_or_16_bit
;13 bit offset:
mov ah,020H ;shift Z (offset bit 12) in place
and ah,dl
shl ah,1
shl ah,1
get_nybble ;get nybble for offset bits 8-11
or ah,cl ;merge nybble
rol ah,1
xor ah,0E1H ;set offset bits 15-13 to 1
sub ah,2 ;substract 512
lodsb ;load match offset bits 0-7
@@get_match_length:
mov bp,ax ;bp:=offset
@@repeat_match:
mov ax,dx ;ax: original token
and al,07H ;isolate match length in token (MMM)
add al,2 ;add MIN_MATCH_SIZE_V2
cmp al,09H ;MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
jne @@got_matchlen ;no, we have full match length from token
get_nybble ;get extra literals length nybble
add al,cl ;add len from token to nybble
cmp al,018H ;MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
jne @@got_matchlen ;no, we have full match length from token
lodsb ;grab extra length byte
add al,018H ;overflow?
jnc @@got_matchlen_big ;if not, we have entire (big) length
je @@done_decompressing ; detect EOD code
lodsw ;grab 16-bit length
;If we're here, we have a larger match copy and can optimize how we do that
@@got_matchlen_big:
xchg cx,ax ;copy match length into cx
mov dx,ds ;save ds
mov ax,es
mov ds,ax ;ds:=es
xchg si,ax ;dx:ax = old ds:si
mov si,di ;ds:si now points at back reference in output data
add si,bp
IF HANDLE_WORD_RUN
cmp bp,-2 ;do we have a byte/word run to optimize?
jae @@do_run ;perform a run
ELSE
cmp bp,-1 ;do we have a byte run to optimize?
je @@do_run_1 ;perform a byte run
ENDIF
;You may be tempted to change "jae" to "jge" because DX is a signed number.
;Don't! The total window is 64k, so if you treat this as a signed comparison,
;you will get incorrect results for offsets over 32K.
;
;If we're here, we have a long copy and it isn't byte-overlapping (if it
;overlapped, we'd be in @@do_run_1) So, let's copy faster with REP MOVSW.
;This won't affect 8088 that much, but it speeds up 8086 and higher.
shr cx,1
rep movsw
adc cx,0
rep movsb
xchg si,ax
mov ds,dx ;restore ds:si
jmp @@decode_token ;go decode another token
;Smaller match copies handled here:
@@got_matchlen:
xchg cx,ax ;copy match length into cx
mov dx,ds ;save ds
mov ax,es
mov ds,ax ;ds:=es
xchg si,ax ;dx:ax = old ds:si
mov si,di ;ds:si = back reference in output data
add si,bp
rep movsb ;copy match
xchg si,ax
mov ds,dx ;restore ds:si
jmp @@decode_token ;go decode another token
@@done_decompressing:
pop ax ;retrieve the original decompression offset
xchg di,ax ;compute decompressed size
sub ax,di
ret ;done
IF HANDLE_WORD_RUN
@@do_run:
je @@do_run_2 ;fall through to byte (common) if not word run
ENDIF
@@do_run_1:
push ax
lodsb ;load first byte of run into al
mov ah,al
shr cx,1
rep stosw ;perform word run
adc cx,0
rep stosb ;finish word run
pop si
mov ds,dx
jmp @@decode_token ;go decode another token
IF HANDLE_WORD_RUN
@@do_run_2:
push ax
lodsw ;load first word of run
shr cx,1
rep stosw ;perform word run
adc cx,0 ;despite 2-byte offset, compressor might
rep stosb ;output odd length. better safe than sorry.
pop si
mov ds,dx
jmp @@decode_token ;go decode another token
ENDIF
ENDP lzsa2_decompress_speed
ENDS
END
;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
;Compression corpus:shuttle alice robotro rletest largetx linewar ...... ..
;Start of exercise 160828 113311 665900 238507 1053865 1004237 ******
;add al,val -> al,cl 160813 113296 668721 237484 1053604 1003815 ++-+++
;sub ah,2 -> dec dec 160907 113585 666744 237484 1056651 1005172 --+*-- rb
;mov ax,cx->xchgcxax 159741 112460 660594 237477 1046770 998323 ++++++
;unroll get_nibble 152552 106327 621119 237345 982381 942373 ++++++
;early exit if LL=0 147242 103842 615559 239318 946863 942932 +++-+-
;push/pop->mov/mov 145447 100832 604822 237288 927017 931366 ++++++
;push/pop->mov/mov(2)143214 98817 592920 239298 908217 910955 +++-++
;rep stos for -1, -2 143289 102812 617087 237164 942081 940688 ---+-- rb
;larger literal cpys 143214 98817 591940 238296 907237 909657 **++++
;larger copys & runs 132440 98802 586551 178768 904129 896709 ++++++ :-)
;smaller lit. copies 131991 99131 583933 177760 901824 898308 +-+++-
;swap smal lit compa 131828 99022 585121 177757 901793 894054 ++-*++
;compare before shif 130587 95970 569908 177753 889221 872461 +++*++
;getmatchlength base 130587 95970 570634 177753 893536 871556 ...... ===
; f->rep_match_or_16 xxxxxx xxxxx 569910 xxxxxx 889266 871435 ..+.++
; f->rep_match_or_la 129966 94748 566169 xxxxxx 880870 867030 +++.++ +++
; f->offset_9_bit 132126 95258 568869 xxxxxx 893169 870364 -++.-+
;final fallthrough 129966 94748 566169 177753 880870 865023 ******

View File

@ -22,15 +22,15 @@
bits 16
; ---------------------------------------------------------------------------
; Decompress raw LZSA block
; Decompress raw LZSA1 block
; inputs:
; * ds:si: raw LZSA block
; * ds:si: raw LZSA1 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
lzsa_decompress:
lzsa1_decompress:
push di ; remember decompression offset
cld ; make string operations (lods, movs, stos..) move forward
@ -38,53 +38,76 @@ lzsa_decompress:
.decode_token:
mov ax,cx ; clear ah - cx is zero from above or from after rep movsb in .copy_match
lodsb ; read token byte: LLL|MMMM|O
lodsb ; read token byte: O|LLL|MMMM
mov dx,ax ; keep token in dl
and al,070H ; isolate literals length in token (LLL)
mov cl,4
shr al,cl ; shift literals length into place
mov cx,ax ; copy literals length into cx
cmp al,07H ; LITERALS_RUN_LEN?
jne .copy_literals ; no, we have the full literals count from the token, go copy
jne .got_literals ; no, we have the full literals count from the token, go copy
call .get_varlen ; get complete literals length
lodsb ; grab extra length byte
add al,07H ; add LITERALS_RUN_LEN
jnc .got_literals ; if no overflow, we have the full literals count, go copy
jne .mid_literals
.copy_literals:
lodsw ; grab 16-bit extra length
db 81H ; mask inc ah/lodsb
; (*like jmp short .got_literals but faster)
.mid_literals:
inc ah ; add 256
lodsb ; grab single extra length byte
.got_literals:
xchg cx,ax
rep movsb ; copy cx literals from ds:si to es:di
test dl,dl ; check match offset size in token (O bit)
js .get_long_offset
xchg ax,cx ; clear ah - cx is zero from the rep movsb above
dec cx
xchg cx,ax ; ah to 0xff - cx was zero from the rep movsb above
lodsb
jmp short .get_match_length
db 3CH ; mask lodsw
; (*like jmp short .get_match_length but faster)
.get_long_offset:
lodsw ; Get 2-byte match offset
.get_match_length:
inc ax ; the match offset is stored off-by-1, increase it
xchg ax,dx ; dx: match offset ax: original token
xchg dx,ax ; dx: match offset ax: original token
and al,0FH ; isolate match length in token (MMMM)
add al,3 ; add MIN_MATCH_SIZE
mov cx,ax ; copy match length into cx
cmp al,0FH ; MATCH_RUN_LEN?
jne .copy_match ; no, we have the full match length from the token, go copy
call .get_varlen ; get complete match length
test cx,cx
je short .done_decompressing ; bail if we hit EOD
cmp al,012H ; MATCH_RUN_LEN?
jne .got_matchlen ; no, we have the full match length from the token, go copy
.copy_match:
add cx,3 ; add MIN_MATCH_SIZE to get the final match length to copy
lodsb ; grab extra length byte
add al,012H ; add MIN_MATCH_SIZE + MATCH_RUN_LEN
jnc .got_matchlen ; if no overflow, we have the entire length
jne .mid_matchlen
lodsw ; grab 16-bit length
test ax,ax ; bail if we hit EOD
je short .done_decompressing
db 81H ; mask inc ah/lodsb
; (*like jmp short .got_literals but faster)
.mid_matchlen:
inc ah ; add 256
lodsb ; grab single extra length byte
.got_matchlen:
xchg cx,ax ; copy match length into cx
push ds ; save ds:si (current pointer to compressed data)
xchg si,ax
push es
pop ds
mov si,di ; ds:si now points at back reference in output data
sub si,dx
add si,dx
rep movsb ; copy match
xchg si,ax ; restore ds:si
pop ds
@ -95,23 +118,3 @@ lzsa_decompress:
xchg ax,di ; compute decompressed size
sub ax,di
ret ; done
.get_varlen:
lodsb ; grab extra length byte
cmp al,0FFH ; 3-byte extra length?
je .large_varlen ; yes, go grab it
add cx,ax ; add extra length byte to length from token
cmp al,0FEH ; 2-byte extra length?
jne .varlen_done ; no, we have the full length now, bail
lodsb ; grab extra length byte
add cx,ax ; add to length from token
.varlen_done:
ret ; bail
.large_varlen:
lodsw ; grab 16-bit extra length
mov cx,ax
ret

176
asm/8088/decompress_small_v2.S Executable file
View File

@ -0,0 +1,176 @@
; decompress_small.S - space-efficient decompressor implementation for 8088
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
segment .text
bits 16
; ---------------------------------------------------------------------------
; Decompress raw LZSA2 block
; inputs:
; * ds:si: raw LZSA2 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
lzsa2_decompress:
push di ; remember decompression offset
cld ; make string operations (lods, movs, stos..) move forward
xor cx,cx
mov bx,0100H
xor bp,bp
.decode_token:
mov ax,cx ; clear ah - cx is zero from above or from after rep movsb in .copy_match
lodsb ; read token byte: XYZ|LL|MMMM
mov dx,ax ; keep token in dl
and al,018H ; isolate literals length in token (LL)
mov cl,3
shr al,cl ; shift literals length into place
cmp al,03H ; LITERALS_RUN_LEN_V2?
jne .got_literals ; no, we have the full literals count from the token, go copy
call .get_nibble ; get extra literals length nibble
add al,cl ; add len from token to nibble
cmp al,012H ; LITERALS_RUN_LEN_V2 + 15 ?
jne .got_literals ; if not, we have the full literals count, go copy
lodsb ; grab extra length byte
add al,012H ; overflow?
jnc .got_literals ; if not, we have the full literals count, go copy
lodsw ; grab 16-bit extra length
.got_literals:
xchg cx,ax
rep movsb ; copy cx literals from ds:si to es:di
test dl,0C0h ; check match offset mode in token (X bit)
js .rep_match_or_large_offset
;;cmp dl,040H ; check if this is a 5 or 9-bit offset (Y bit)
; discovered via the test with bit 6 set
xchg cx,ax ; clear ah - cx is zero from the rep movsb above
jne .offset_9_bit
; 5 bit offset
cmp dl,020H ; test bit 5
call .get_nibble_x
jmp short .dec_offset_top
.offset_9_bit: ; 9 bit offset
lodsb ; get 8 bit offset from stream in A
dec ah ; set offset bits 15-8 to 1
test dl,020H ; test bit Z (offset bit 8)
je .get_match_length
.dec_offset_top:
dec ah ; clear bit 8 if Z bit is clear
; or set offset bits 15-8 to 1
jmp short .get_match_length
.rep_match_or_large_offset:
;;cmp dl,0c0H ; check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit)
jpe .rep_match_or_16_bit
; 13 bit offset
cmp dl,0A0H ; test bit 5 (knowing that bit 7 is also set)
xchg ah,al
call .get_nibble_x
sub al,2 ; substract 512
jmp short .get_match_length_1
.rep_match_or_16_bit:
test dl,020H ; test bit Z (offset bit 8)
jne .repeat_match ; rep-match
; 16 bit offset
lodsb ; Get 2-byte match offset
.get_match_length_1:
xchg ah,al
lodsb ; load match offset bits 0-7
.get_match_length:
xchg bp,ax ; bp: offset
.repeat_match:
xchg ax,dx ; ax: original token
and al,07H ; isolate match length in token (MMM)
add al,2 ; add MIN_MATCH_SIZE_V2
cmp al,09H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
jne .got_matchlen ; no, we have the full match length from the token, go copy
call .get_nibble ; get extra literals length nibble
add al,cl ; add len from token to nibble
cmp al,018H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
jne .got_matchlen ; no, we have the full match length from the token, go copy
lodsb ; grab extra length byte
add al,018H ; overflow?
jnc .got_matchlen ; if not, we have the entire length
je short .done_decompressing ; detect EOD code
lodsw ; grab 16-bit length
.got_matchlen:
xchg cx,ax ; copy match length into cx
push ds ; save ds:si (current pointer to compressed data)
xchg si,ax
push es
pop ds
lea si,[bp+di] ; ds:si now points at back reference in output data
rep movsb ; copy match
xchg si,ax ; restore ds:si
pop ds
jmp .decode_token ; go decode another token
.done_decompressing:
pop ax ; retrieve the original decompression offset
xchg di,ax ; compute decompressed size
sub ax,di
ret ; done
.get_nibble_x:
cmc ; carry set if bit 4 was set
rcr al,1
call .get_nibble ; get nibble for offset bits 0-3
or al,cl ; merge nibble
rol al,1
xor al,0E1H ; set offset bits 7-5 to 1
ret
.get_nibble:
neg bh ; nibble ready?
jns .has_nibble
xchg bx,ax
lodsb ; load two nibbles
xchg bx,ax
.has_nibble:
mov cl,4 ; swap 4 high and low bits of nibble
ror bl,cl
mov cl,0FH
and cl,bl
ret

View File

@ -0,0 +1,236 @@
; decompress_speed_v1.S - time-efficient decompressor implementation for 8088
; NASM syntax.
;
; Usual DOS assembler SMALL model assumptions apply. This code:
; - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
; - Is interrupt-safe
; - Is not re-entrant (do not decompress while already running decompression)
; - Trashes all data and segment registers
;
; Copyright (C) 2019 Jim Leonard, Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
segment .text
bits 16
; ---------------------------------------------------------------------------
; Decompress raw LZSA1 block
; inputs:
; * ds:si: raw LZSA1 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
; Must declare this in the code segment:
SHR4table:
DB 00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h
DB 01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h
DB 02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h
DB 03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h
DB 04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h
DB 05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h
DB 06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h
DB 07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h
lzsa1_decompress_speed:
push di ;remember decompression offset
cld ;ensure string ops move forward
mov bx,SHR4table
xor cx,cx
.decode_token:
xchg cx,ax ;clear ah (cx = 0 from match copy's rep movsb)
lodsb ;read token byte: O|LLL|MMMM
mov dx,ax ;copy our token to dl for later MMMM handling
and al,070H ;isolate literals length in token (LLL)
jz .check_offset_size ;if LLL=0, we have no literals; goto match
cmp al,070H ;LITERALS_RUN_LEN?
jne .got_literals ;no, we have full count from token; go copy
lodsb ;grab extra length byte
add al,07H ;add LITERALS_RUN_LEN
jnc .got_literals_exact ;if no overflow, we have full count
je .big_literals
.mid_literals:
lodsb ;grab single extra length byte
inc ah ;add 256
xchg cx,ax ;with longer counts, we can save some time
shr cx,1 ;by doing a word copy instead of a byte copy.
rep movsw ;We don't need to account for overlap because
adc cx,0 ;source for literals isn't the output buffer.
rep movsb
jmp .check_offset_size
.big_literals:
lodsw ;grab 16-bit extra length
xchg cx,ax ;with longer counts, we can save some time
shr cx,1 ;by doing a word copy instead of a byte copy.
rep movsw
adc cx,0
rep movsb
jmp .check_offset_size
.got_literals:
cs xlat ;shift literals length into place
.got_literals_exact:
xchg cx,ax
rep movsb ;copy cx literals from ds:si to es:di
.check_offset_size:
test dl,dl ;check match offset size in token (O bit)
js .get_long_offset ;load absolute 16-bit match offset
mov ah,0ffh ;set up high byte
lodsb ;load low byte
.get_match_length:
xchg dx,ax ;dx: match offset ax: original token
and al,0FH ;isolate match length in token (MMMM)
cmp al,0FH ;MATCH_RUN_LEN?
jne .got_matchlen_short ;no, we have the full match length from the token, go copy
lodsb ;grab extra length byte
add al,012H ;add MIN_MATCH_SIZE + MATCH_RUN_LEN
jnc .do_long_copy ;if no overflow, we have the entire length
jne .mid_matchlen
lodsw ;grab 16-bit length
xchg cx,ax ;get ready to do a long copy
jcxz .done_decompressing ;wait, is it the EOD marker? Exit if so
jmp .copy_len_preset ;otherwise, do the copy
.get_long_offset:
lodsw ;Get 2-byte match offset
jmp .get_match_length
.got_matchlen_short:
add al,3 ;add MIN_MATCH_SIZE
xchg cx,ax ;copy match length into cx
mov bp,ds ;save ds
mov ax,es
mov ds,ax ;ds=es
xchg ax,si ;save si
mov si,di ;ds:si now points at back reference in output data
add si,dx
rep movsb ;copy match
xchg si,ax ;restore si
mov ds,bp ;restore ds
jmp .decode_token ;go decode another token
.done_decompressing:
pop ax ;retrieve the original decompression offset
xchg di,ax ;compute decompressed size
sub ax,di
ret ;done decompressing, exit to caller
;With a confirmed longer match length, we have an opportunity to optimize for
;the case where a single byte is repeated long enough that we can benefit
;from rep movsw to perform the run (instead of rep movsb).
.mid_matchlen:
lodsb ;grab single extra length byte
inc ah ;add 256
.do_long_copy:
xchg cx,ax ;copy match length into cx
.copy_len_preset:
push ds ;save ds
mov bp,es
mov ds,bp ;ds=es
mov bp,si ;save si
mov si,di ;ds:si now points at back reference in output data
add si,dx
cmp dx,-2 ;do we have a byte/word run to optimize?
jae .do_run ;perform a run
;You may be tempted to change "jae" to "jge" because DX is a signed number.
;Don't! The total window is 64k, so if you treat this as a signed comparison,
;you will get incorrect results for offsets over 32K.
;If we're here, we have a long copy and it isn't byte-overlapping (if it
;overlapped, we'd be in .do_run_1) So, let's copy faster with REP MOVSW.
;This won't affect 8088 that much, but it speeds up 8086 and higher.
shr cx,1
rep movsw
adc cx,0
rep movsb
mov si,bp ;restore si
pop ds
jmp .decode_token ;go decode another token
.do_run:
je .do_run_2 ;fall through to byte (common) if not word run
.do_run_1:
lodsb ;load first byte of run into al
mov ah,al
shr cx,1
rep stosw ;perform word run
adc cx,0
rep stosb ;finish word run
mov si,bp ;restore si
pop ds
jmp .decode_token ;go decode another token
.do_run_2:
lodsw ;load first word of run
shr cx,1
rep stosw ;perform word run
adc cx,0 ;despite 2-byte offset, compressor might
rep stosb ;output odd length. better safe than sorry.
mov si,bp ;restore si
pop ds
jmp .decode_token ;go decode another token
;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
; original E. Marty code shuttle 123208 alice 65660 robotron 407338 ***
; table for shr al,4 shuttle 120964 alice 63230 robotron 394733 +++
; push/pop to mov/mov shuttle 118176 alice 61835 robotron 386762 +++
; movsw for literalcpys shuttle 124102 alice 64908 robotron 400220 --- rb
; stosw for byte runs shuttle 118897 alice 65040 robotron 403518 --- rb
; better stosw for runs shuttle 117712 alice 65040 robotron 403343 +--
; disable RLE by default shuttle 116924 alice 60783 robotron 381226 +++
; optimize got_matchlen shuttle 115294 alice 59588 robotron 374330 +++
; fall through to getML shuttle 113258 alice 59572 robotron 372004 +++
; fall through to midLI shuttle 113258 alice 59572 robotron 375060 ..- rb
; fall through midMaLen shuttle 113247 alice 59572 robotron 372004 +.+
; movsw for litlen > 255 shuttle 113247 alice 59572 robotron 371612 ..+
; rep stosw for long runs shuttle 113247 alice 59572 robotron 371612 ...
; rep movsw for long cpys shuttle 113247 alice 59572 robotron 371035 ..+
; xchg/dec ah -> mov ah,val shuttle 112575 alice 59272 robotron 369198 +++
; force >12h len.to longcpy shuttle 101998 alice 59266 robotron 364459 +.+
; more efficient run branch shuttle 102239 alice 59297 robotron 364716 --- rb
; even more eff. run branch shuttle 101998 alice 59266 robotron 364459 ***
; BUGFIX - bad sign compare shuttle 101955 alice 59225 robotron 364117 +++
; reverse 16-bit len compar shuttle 102000 alice 59263 robotron 364460 --- rb
; jcxz for EOD detection no change to speed, but is 1 byte shorter +++
; force movsw for literals shuttle 107183 alice 62555 robotron 379524 --- rb
; defer shr4 until necessry shuttle 102069 alice 60236 robotron 364096 ---
; skip literals if LLL=0 shuttle 98655 alice 57849 robotron 363358 ---
; fall through to mid_liter shuttle 98595 alice 57789 robotron 361998 +++
; == jumptable experiments begin ==
; jumptable for small copys shuttle 101594 alice 61078 robotron 386018 ---
; start:xchg instead of mov shuttle 100948 alice 60467 robotron 381112 +++
; use table for LLL=0 check shuttle 106972 alice 63333 robotron 388304 --- rb
; jmptbl to fallthrough mov shuttle 102532 alice 60760 robotron 383070 ---
; cpy fallthrough check_ofs shuttle 98939 alice 58917 robotron 371019 +**
; single jumptable jump shuttle 97528 alice 57264 robotron 362194 ++*
; conditional check for L=7 shuttle 98610 alice 58521 robotron 368153 --- rb
; rip out the jumptable :-/ shuttle 97616 alice 57128 robotron 360697 +++
; defer add MIN_MATCH_SIZE shuttle 97250 alice 57004 robotron 361191 ++?
; cache constants in regs shuttle 104681 alice 59939 robotron 380125 --- rb

View File

@ -0,0 +1,288 @@
; decompress_speed_v2.S - LZSA v2 time-efficient decompressor implementation for 8088
; NASM syntax.
;
; Usual DOS assembler SMALL model assumptions apply. This code:
; - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
; - Is interrupt-safe
; - Is not re-entrant (do not decompress while already running decompression)
; - Trashes all data and segment registers
;
; Copyright (C) 2019 Jim Leonard, Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
segment .text
bits 16
;While LZSA2 is technically capable of generating a match offset of -2,
;this sequence never actually showed up in my LZSA2 test corpus, likely due
;to compressor optimizations and the LZSA2 format itself. If you know your
;test data will contain a match offset of -2, you can enable code to write
;out the sequence very quickly at the cost of 18 bytes of code.
HANDLE_WORD_RUN EQU 0
; ---------------------------------------------------------------------------
; Decompress raw LZSA2 block
; inputs:
; * ds:si: raw LZSA2 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
%macro get_nybble 0
neg bh ;nybble ready?
jns %%has_nybble
xchg bx,ax
lodsb ;load two nybbles
xchg bx,ax
%%has_nybble:
mov cl,4 ;swap 4 high and low bits of nybble
ror bl,cl
mov cl,0FH
and cl,bl
%endmacro
lzsa2_decompress_speed:
push di ;remember decompression offset
cld ;make string operations go forward
xor cx,cx
mov bx,0100H ;bx used by get_nybble
.decode_token:
mov ax,cx ;clear ah - cx is zero (and must stay that way)
lodsb ;read token byte: XYZ|LL|MMMM
mov dx,ax ;keep copy of token in dl
and al,018H ;isolate literals length in token (LL)
jz .check_offset ;no literals? stop decoding, go to matches
;At this point, al can be in three (unshifted) states: 1, 2, or 3.
;3 = not done yet.
cmp al,(2 << 3) ;LITERALS_RUN_LEN_V2? (original: cmp al,03h)
jb .lit1b ;LZSA2 output 1-byte more often, so test first
je .lit2b
mov cl,3
shr al,cl ;shift literals length into place
get_nybble ;cl := get extra literals length nybble
add al,cl ;add len from token to nybble
cmp al,012H ;LITERALS_RUN_LEN_V2 + 15 ?
jne .got_literals ;if not, we have the full literals count
lodsb ;grab extra length byte
add al,012H ;overflow?
jnc .got_literals_big ;if not, we have a big full literals count
lodsw ;grab 16-bit extra length
;For larger counts, it pays to set up a faster copy
.got_literals_big:
xchg cx,ax
shr cx,1
rep movsw
adc cx,0
rep movsb
jmp .check_offset
.got_literals:
xchg cx,ax
rep movsb ;copy cx literals from ds:si to es:di
jmp .check_offset
;LZSA2 likes to produce tiny literals of 1 or 2 bytes. Handle them here.
.lit2b:movsb
.lit1b:movsb
.check_offset:
test dl,dl ;check match offset mode in token (X bit)
js .rep_match_or_large_offset
cmp dl,040H ;check if this is a 5 or 9-bit offset (Y bit)
jnb .offset_9_bit
;5 bit offset:
xchg cx,ax ;clear ah - cx is zero from prior rep movs
mov al,020H ;shift Z (offset bit 4) in place
and al,dl
shl al,1
shl al,1
get_nybble ;get nybble for offset bits 0-3
or al,cl ;merge nybble
rol al,1
xor al,0E1H ;set offset bits 7-5 to 1
dec ah ;set offset bits 15-8 to 1
jmp .get_match_length
.rep_match_or_16_bit:
test dl,020H ;test bit Z (offset bit 8)
jne .repeat_match ;rep-match
;16 bit offset:
lodsw ;Get 2-byte match offset
xchg ah,al
jmp .get_match_length
.offset_9_bit:
;9 bit offset:
xchg cx,ax ;clear ah - cx is zero from prior rep movs
lodsb ;get 8 bit offset from stream in A
dec ah ;set offset bits 15-8 to 1
test dl,020H ;test bit Z (offset bit 8)
je .get_match_length
dec ah ;clear bit 8 if Z bit is clear
jmp .get_match_length
.rep_match_or_large_offset:
cmp dl,0c0H ;check if this is a 13-bit offset
;or a 16-bit offset/rep match (Y bit)
jnb .rep_match_or_16_bit
;13 bit offset:
mov ah,020H ;shift Z (offset bit 12) in place
and ah,dl
shl ah,1
shl ah,1
get_nybble ;get nybble for offset bits 8-11
or ah,cl ;merge nybble
rol ah,1
xor ah,0E1H ;set offset bits 15-13 to 1
sub ah,2 ;substract 512
lodsb ;load match offset bits 0-7
.get_match_length:
mov bp,ax ;bp:=offset
.repeat_match:
mov ax,dx ;ax: original token
and al,07H ;isolate match length in token (MMM)
add al,2 ;add MIN_MATCH_SIZE_V2
cmp al,09H ;MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
jne .got_matchlen ;no, we have full match length from token
get_nybble ;get extra literals length nybble
add al,cl ;add len from token to nybble
cmp al,018H ;MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
jne .got_matchlen ;no, we have full match length from token
lodsb ;grab extra length byte
add al,018H ;overflow?
jnc .got_matchlen_big ;if not, we have entire (big) length
je .done_decompressing ; detect EOD code
lodsw ;grab 16-bit length
;If we're here, we have a larger match copy and can optimize how we do that
.got_matchlen_big:
xchg cx,ax ;copy match length into cx
mov dx,ds ;save ds
mov ax,es
mov ds,ax ;ds:=es
xchg si,ax ;dx:ax = old ds:si
mov si,di ;ds:si now points at back reference in output data
add si,bp
%if HANDLE_WORD_RUN
cmp bp,-2 ;do we have a byte/word run to optimize?
jae .do_run ;perform a run
%else
cmp bp,-1 ;do we have a byte run to optimize?
je .do_run_1 ;perform a byte run
%endif
;You may be tempted to change "jae" to "jge" because DX is a signed number.
;Don't! The total window is 64k, so if you treat this as a signed comparison,
;you will get incorrect results for offsets over 32K.
;
;If we're here, we have a long copy and it isn't byte-overlapping (if it
;overlapped, we'd be in .do_run_1) So, let's copy faster with REP MOVSW.
;This won't affect 8088 that much, but it speeds up 8086 and higher.
shr cx,1
rep movsw
adc cx,0
rep movsb
xchg si,ax
mov ds,dx ;restore ds:si
jmp .decode_token ;go decode another token
;Smaller match copies handled here:
.got_matchlen:
xchg cx,ax ;copy match length into cx
mov dx,ds ;save ds
mov ax,es
mov ds,ax ;ds:=es
xchg si,ax ;dx:ax = old ds:si
mov si,di ;ds:si = back reference in output data
add si,bp
rep movsb ;copy match
xchg si,ax
mov ds,dx ;restore ds:si
jmp .decode_token ;go decode another token
.done_decompressing:
pop ax ;retrieve the original decompression offset
xchg di,ax ;compute decompressed size
sub ax,di
ret ;done
%if HANDLE_WORD_RUN
.do_run:
je .do_run_2 ;fall through to byte (common) if not word run
%endif
.do_run_1:
push ax
lodsb ;load first byte of run into al
mov ah,al
shr cx,1
rep stosw ;perform word run
adc cx,0
rep stosb ;finish word run
pop si
mov ds,dx
jmp .decode_token ;go decode another token
%if HANDLE_WORD_RUN
.do_run_2:
push ax
lodsw ;load first word of run
shr cx,1
rep stosw ;perform word run
adc cx,0 ;despite 2-byte offset, compressor might
rep stosb ;output odd length. better safe than sorry.
pop si
mov ds,dx
jmp .decode_token ;go decode another token
%endif
;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
;Compression corpus:shuttle alice robotro rletest largetx linewar ...... ..
;Start of exercise 160828 113311 665900 238507 1053865 1004237 ******
;add al,val -> al,cl 160813 113296 668721 237484 1053604 1003815 ++-+++
;sub ah,2 -> dec dec 160907 113585 666744 237484 1056651 1005172 --+*-- rb
;mov ax,cx->xchgcxax 159741 112460 660594 237477 1046770 998323 ++++++
;unroll get_nibble 152552 106327 621119 237345 982381 942373 ++++++
;early exit if LL=0 147242 103842 615559 239318 946863 942932 +++-+-
;push/pop->mov/mov 145447 100832 604822 237288 927017 931366 ++++++
;push/pop->mov/mov(2)143214 98817 592920 239298 908217 910955 +++-++
;rep stos for -1, -2 143289 102812 617087 237164 942081 940688 ---+-- rb
;larger literal cpys 143214 98817 591940 238296 907237 909657 **++++
;larger copys & runs 132440 98802 586551 178768 904129 896709 ++++++ :-)
;smaller lit. copies 131991 99131 583933 177760 901824 898308 +-+++-
;swap smal lit compa 131828 99022 585121 177757 901793 894054 ++-*++
;compare before shif 130587 95970 569908 177753 889221 872461 +++*++
;getmatchlength base 130587 95970 570634 177753 893536 871556 ...... ===
; f->rep_match_or_16 xxxxxx xxxxx 569910 xxxxxx 889266 871435 ..+.++
; f->rep_match_or_la 129966 94748 566169 xxxxxx 880870 867030 +++.++ +++
; f->offset_9_bit 132126 95258 568869 xxxxxx 893169 870364 -++.-+
;final fallthrough 129966 94748 566169 177753 880870 865023 ******

View File

@ -0,0 +1,120 @@
; decompress_small_v1.asm - space-efficient decompressor implementation for x86
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
segment .text
bits 32
; ---------------------------------------------------------------------------
; Decompress raw LZSA1 block
; inputs:
; * esi: raw LZSA1 block
; * edi: output buffer
; output:
; * eax: decompressed size
; ---------------------------------------------------------------------------
%ifndef BIN
global lzsa1_decompress
global _lzsa1_decompress
%endif
lzsa1_decompress:
_lzsa1_decompress:
pushad
;mov edi, [esp+32+4] ; edi = outbuf
;mov esi, [esp+32+8] ; esi = inbuf
xor ecx, ecx
.decode_token:
mul ecx
lodsb ; read token byte: O|LLL|MMMM
mov dl, al ; keep token in dl
and al, 070H ; isolate literals length in token (LLL)
shr al, 4 ; shift literals length into place
cmp al, 07H ; LITERALS_RUN_LEN?
jne .got_literals ; no, we have the full literals count from the token, go copy
lodsb ; grab extra length byte
add al, 07H ; add LITERALS_RUN_LEN
jnc .got_literals ; if no overflow, we have the full literals count, go copy
jne .mid_literals
lodsw ; grab 16-bit extra length
jmp .got_literals
.mid_literals:
lodsb ; grab single extra length byte
inc ah ; add 256
.got_literals:
xchg ecx, eax
rep movsb ; copy cx literals from ds:si to es:di
test dl, dl ; check match offset size in token (O bit)
js .get_long_offset
dec ecx
xchg eax, ecx ; clear ah - cx is zero from the rep movsb above
lodsb
jmp .get_match_length
.get_long_offset:
lodsw ; Get 2-byte match offset
.get_match_length:
xchg eax, edx ; edx: match offset eax: original token
and al, 0FH ; isolate match length in token (MMMM)
add al, 3 ; add MIN_MATCH_SIZE
cmp al, 012H ; MATCH_RUN_LEN?
jne .got_matchlen ; no, we have the full match length from the token, go copy
lodsb ; grab extra length byte
add al,012H ; add MIN_MATCH_SIZE + MATCH_RUN_LEN
jnc .got_matchlen ; if no overflow, we have the entire length
jne .mid_matchlen
lodsw ; grab 16-bit length
test eax, eax ; bail if we hit EOD
je .done_decompressing
jmp .got_matchlen
.mid_matchlen:
lodsb ; grab single extra length byte
inc ah ; add 256
.got_matchlen:
xchg ecx, eax ; copy match length into ecx
xchg esi, eax
mov esi, edi ; esi now points at back reference in output data
movsx edx, dx ; sign-extend dx to 32-bits.
add esi, edx
rep movsb ; copy match
xchg esi, eax ; restore esi
jmp .decode_token ; go decode another token
.done_decompressing:
sub edi, [esp+32+4]
mov [esp+28], edi ; eax = decompressed size
popad
ret ; done

View File

@ -0,0 +1,181 @@
; decompress_small_v2.asm - space-efficient decompressor implementation for x86
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
segment .text
bits 32
; ---------------------------------------------------------------------------
; Decompress raw LZSA2 block
; inputs:
; * esi: raw LZSA2 block
; * edi: output buffer
; output:
; * eax: decompressed size
; ---------------------------------------------------------------------------
%ifndef BIN
global lzsa2_decompress
global _lzsa2_decompress
%endif
lzsa2_decompress:
_lzsa2_decompress:
pushad
;mov edi, [esp+32+4] ; edi = outbuf
;mov esi, [esp+32+8] ; esi = inbuf
xor ecx, ecx
xor ebx, ebx ; ebx = 0100H
inc bh
xor ebp, ebp
.decode_token:
mul ecx
lodsb ; read token byte: XYZ|LL|MMMM
mov dl, al ; keep token in dl
and al, 018H ; isolate literals length in token (LL)
shr al, 3 ; shift literals length into place
cmp al, 03H ; LITERALS_RUN_LEN_V2?
jne .got_literals ; no, we have the full literals count from the token, go copy
call .get_nibble ; get extra literals length nibble
add al, cl ; add len from token to nibble
cmp al, 012H ; LITERALS_RUN_LEN_V2 + 15 ?
jne .got_literals ; if not, we have the full literals count, go copy
lodsb ; grab extra length byte
add al,012H ; overflow?
jnc .got_literals ; if not, we have the full literals count, go copy
lodsw ; grab 16-bit extra length
.got_literals:
xchg ecx, eax
rep movsb ; copy ecx literals from esi to edi
test dl, 0C0h ; check match offset mode in token (X bit)
js .rep_match_or_large_offset
;;cmp dl,040H ; check if this is a 5 or 9-bit offset (Y bit)
; discovered via the test with bit 6 set
xchg ecx, eax ; clear ah - cx is zero from the rep movsb above
jne .offset_9_bit
; 5 bit offset
cmp dl, 020H ; test bit 5
call .get_nibble_x
jmp .dec_offset_top
.offset_9_bit: ; 9 bit offset
lodsb ; get 8 bit offset from stream in A
dec ah ; set offset bits 15-8 to 1
test dl, 020H ; test bit Z (offset bit 8)
je .get_match_length
.dec_offset_top:
dec ah ; clear bit 8 if Z bit is clear
; or set offset bits 15-8 to 1
jmp .get_match_length
.rep_match_or_large_offset:
;;cmp dl,0c0H ; check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit)
jpe .rep_match_or_16_bit
; 13 bit offset
cmp dl, 0A0H ; test bit 5 (knowing that bit 7 is also set)
xchg ah, al
call .get_nibble_x
sub al, 2 ; substract 512
jmp .get_match_length_1
.rep_match_or_16_bit:
test dl, 020H ; test bit Z (offset bit 8)
jne .repeat_match ; rep-match
; 16 bit offset
lodsb ; Get 2-byte match offset
.get_match_length_1:
xchg ah, al
lodsb ; load match offset bits 0-7
.get_match_length:
xchg ebp, eax ; ebp: offset
.repeat_match:
xchg eax, edx ; ax: original token
and al, 07H ; isolate match length in token (MMM)
add al, 2 ; add MIN_MATCH_SIZE_V2
cmp al, 09H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
jne .got_matchlen ; no, we have the full match length from the token, go copy
call .get_nibble ; get extra literals length nibble
add al, cl ; add len from token to nibble
cmp al, 018H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
jne .got_matchlen ; no, we have the full match length from the token, go copy
lodsb ; grab extra length byte
add al,018H ; overflow?
jnc .got_matchlen ; if not, we have the entire length
je .done_decompressing ; detect EOD code
lodsw ; grab 16-bit length
.got_matchlen:
xchg ecx, eax ; copy match length into ecx
xchg esi, eax
movsx ebp, bp ; sign-extend bp to 32-bits
lea esi,[ebp+edi] ; esi now points at back reference in output data
rep movsb ; copy match
xchg esi, eax ; restore esi
jmp .decode_token ; go decode another token
.done_decompressing:
sub edi, [esp+32+4]
mov [esp+28], edi
popad
ret ; done
.get_nibble_x:
cmc ; carry set if bit 4 was set
rcr al, 1
call .get_nibble ; get nibble for offset bits 0-3
or al, cl ; merge nibble
rol al, 1
xor al, 0E1H ; set offset bits 7-5 to 1
ret
.get_nibble:
neg bh ; nibble ready?
jns .has_nibble
xchg ebx, eax
lodsb ; load two nibbles
xchg ebx, eax
.has_nibble:
mov cl, 4 ; swap 4 high and low bits of nibble
ror bl, cl
mov cl, 0FH
and cl, bl
ret

201
asm/z80/unlzsa1_fast.asm Normal file
View File

@ -0,0 +1,201 @@
;
; Speed-optimized LZSA1 decompressor by spke & uniabis (109 bytes)
;
; ver.00 by spke for LZSA 0.5.4 (03-24/04/2019, 134 bytes);
; ver.01 by spke for LZSA 0.5.6 (25/04/2019, 110(-24) bytes, +0.2% speed);
; ver.02 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
; ver.03 by uniabis (30/07/2019, 109(-1) bytes, +3.5% speed);
; ver.04 by spke (31/07/2019, small re-organization of macros);
; ver.05 by uniabis (22/08/2019, 107(-2) bytes, same speed);
; ver.06 by spke for LZSA 1.0.7 (27/08/2019, 111(+4) bytes, +2.1% speed);
; ver.07 by spke for LZSA 1.1.0 (25/09/2019, added full revision history);
; ver.08 by spke for LZSA 1.1.2 (22/10/2019, re-organized macros and added an option for unrolled copying of long matches);
; ver.09 by spke for LZSA 1.2.1 (02/01/2020, 109(-2) bytes, same speed)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f1 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f1 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+57 bytes)
; DEFINE BACKWARD_DECOMPRESS
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
ENDM
MACRO COPY1
ldi
ENDM
MACRO COPYBC
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
ex de,hl : ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
ENDM
MACRO COPY1
ldd
ENDM
MACRO COPYBC
lddr
ENDM
ENDIF
@DecompressLZSA1:
ld b,0 : jr ReadToken
NoLiterals: xor (hl) : NEXT_HL : jp m,LongOffset
ShortOffset: push de : ld e,(hl) : ld d,#FF
; short matches have length 0+3..14+3
add 3 : cp 15+3 : jr nc,LongerMatch
; placed here this saves a JP per iteration
CopyMatch: ld c,a
.UseC NEXT_HL : ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
COPY1 : COPY1 : COPYBC ; BC = 0, DE = dest
.popSrc pop hl ; HL = src
ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
; a length of the match that follows after the literals
ld a,(hl) : and #70 : jr z,NoLiterals
cp #70 : jr z,MoreLiterals ; LLL=7 means 7+ literals...
rrca : rrca : rrca : rrca : ld c,a ; LLL<7 means 0..6 literals...
ld a,(hl) : NEXT_HL
COPYBC
; the top bit of token is set if the offset contains two bytes
and #8F : jp p,ShortOffset
LongOffset: ; read second byte of the offset
push de : ld e,(hl) : NEXT_HL : ld d,(hl)
add -128+3 : cp 15+3 : jp c,CopyMatch
IFNDEF UNROLL_LONG_MATCHES
; MMMM=15 indicates a multi-byte number of literals
LongerMatch: NEXT_HL : add (hl) : jr nc,CopyMatch
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyMatch.UseC
.code0 NEXT_HL : ld b,(hl)
; the two-byte match length equal to zero
; designates the end-of-data marker
ld a,b : or c : jr nz,CopyMatch.UseC
pop de : ret
ELSE
; MMMM=15 indicates a multi-byte number of literals
LongerMatch: NEXT_HL : add (hl) : jr c,VeryLongMatch
ld c,a
.UseC NEXT_HL : ex (sp),hl
ADD_OFFSET
COPY1 : COPY1
; this is an unrolled equivalent of LDIR
xor a : sub c
and 16-1 : add a
ld (.jrOffset),a : jr nz,$+2
.jrOffset EQU $-1
.fastLDIR DUP 16
COPY1
EDUP
jp pe,.fastLDIR
jp CopyMatch.popSrc
VeryLongMatch: ; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,LongerMatch.UseC
.code0 NEXT_HL : ld b,(hl)
; the two-byte match length equal to zero
; designates the end-of-data marker
ld a,b : or c : jr nz,LongerMatch.UseC
pop de : ret
ENDIF
MoreLiterals: ; there are three possible situations here
xor (hl) : NEXT_HL : exa
ld a,7 : add (hl) : jr c,ManyLiterals
CopyLiterals: ld c,a
.UseC NEXT_HL : COPYBC
exa : jp p,ShortOffset : jr LongOffset
ManyLiterals:
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyLiterals.UseC
.code0 NEXT_HL : ld b,(hl) : jr CopyLiterals.UseC

135
asm/z80/unlzsa1_small.asm Normal file
View File

@ -0,0 +1,135 @@
;
; Size-optimized LZSA1 decompressor by spke & uniabis (67 bytes)
;
; ver.00 by spke for LZSA 0.5.4 (23/04/2019, 69 bytes);
; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
; ver.02 by uniabis (30/07/2019, 68(-1) bytes, +3.2% speed);
; ver.03 by spke for LZSA 1.0.7 (31/07/2019, small re-organization of macros);
; ver.04 by spke (06/08/2019, 67(-1) bytes, -1.2% speed);
; ver.05 by spke for LZSA 1.1.0 (25/09/2019, added full revision history)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f1 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f1 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE BACKWARD_DECOMPRESS
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ENDIF
@DecompressLZSA1:
ld b,0
; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
; a length of the match that follows after the literals
ReadToken: ld a,(hl) : NEXT_HL : push af
and #70 : jr z,NoLiterals
rrca : rrca : rrca : rrca ; LLL<7 means 0..6 literals...
cp #07 : call z,ReadLongBA ; LLL=7 means 7+ literals...
ld c,a : BLOCKCOPY
; next we read the low byte of the -offset
NoLiterals: pop af : push de : ld e,(hl) : NEXT_HL : ld d,#FF
; the top bit of token is set if
; the offset contains the high byte as well
or a : jp p,ShortOffset
LongOffset: ld d,(hl) : NEXT_HL
; last but not least, the match length is read
ShortOffset: and #0F : add 3 ; MMMM<15 means match lengths 0+3..14+3
cp 15+3 : call z,ReadLongBA ; MMMM=15 means lengths 14+3+
ld c,a
ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP -> [src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
BLOCKCOPY ; BC = 0, DE = dest
pop hl : jr ReadToken ; HL = src
; a standard routine to read extended codes
; into registers B (higher byte) and A (lower byte).
ReadLongBA: add (hl) : NEXT_HL : ret nc
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1: ld b,a : ld a,(hl) : NEXT_HL : ret nz
.code0: ld c,a : ld b,(hl) : NEXT_HL
; the two-byte match length equal to zero
; designates the end-of-data marker
or b : ld a,c : ret nz
pop de : pop de : ret

281
asm/z80/unlzsa2_fast.asm Normal file
View File

@ -0,0 +1,281 @@
;
; Speed-optimized LZSA2 decompressor by spke & uniabis (216 bytes)
;
; ver.00 by spke for LZSA 1.0.0 (02-07/06/2019, 218 bytes);
; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
; ver.02 by spke for LZSA 1.0.6 (27/07/2019, fixed a bug in the backward decompressor);
; ver.03 by uniabis (30/07/2019, 213(-5) bytes, +3.8% speed and support for Hitachi HD64180);
; ver.04 by spke for LZSA 1.0.7 (01/08/2019, 214(+1) bytes, +0.2% speed and small re-organization of macros);
; ver.05 by spke (27/08/2019, 216(+2) bytes, +1.1% speed);
; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history);
; ver.07 by spke for LZSA 1.1.1 (10/10/2019, +0.2% speed and an option for unrolled copying of long matches)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f2 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA2
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f2 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA2
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+38 bytes)
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
ENDM
MACRO COPY1
ldi
ENDM
MACRO COPYBC
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
ex de,hl : ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
ENDM
MACRO COPY1
ldd
ENDM
MACRO COPYBC
lddr
ENDM
ENDIF
IFNDEF HD64180
MACRO LD_IX_DE
ld ixl,e : ld ixh,d
ENDM
MACRO LD_DE_IX
ld e,ixl : ld d,ixh
ENDM
ELSE
MACRO LD_IX_DE
push de : pop ix
ENDM
MACRO LD_DE_IX
push ix : pop de
ENDM
ENDIF
@DecompressLZSA2:
; A' stores next nibble as %1111.... or assumed to contain trash
; B is assumed to be 0
ld b,0 : scf : exa : jr ReadToken
ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,CopyLiterals
ld c,(hl) : NEXT_HL
ld a,b : ld b,(hl)
jr ReadToken.NEXTHLuseBC
MoreLiterals: ld b,(hl) : NEXT_HL
scf : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca
.noUpdate ;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
inc a : jr z,ManyLiterals : sub #F0-3+1
CopyLiterals: ld c,a : ld a,b : ld b,0
COPYBC
push de : or a : jp p,CASE0xx ;: jr CASE1xx
cp %11000000 : jr c,CASE10x
CASE11x cp %11100000 : jr c,CASE110
; "111": repeated offset
CASE111: LD_DE_IX : jr MatchLen
Literals0011: jr nz,MoreLiterals
; if "LL" of the byte token is equal to 0,
; there are no literals to copy
NoLiterals: or (hl) : NEXT_HL
push de : jp m,CASE1xx
; short (5 or 9 bit long) offsets
CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x
; "01x": the case of the 9-bit offset
CASE01x: cp %01100000 : rl d
ReadOffsetE ld e,(hl) : NEXT_HL
SaveOffset: LD_IX_DE
MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a
CopyMatch: ld c,a
.useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
COPY1
COPYBC
.popSrc pop hl
; compressed data stream contains records
; each record begins with the byte token "XYZ|LL|MMM"
ReadToken: ld a,(hl) : and %00011000 : jp pe,Literals0011 ; process the cases 00 and 11 separately
rrca : rrca : rrca
ld c,a : ld a,(hl) ; token is re-read for further processing
.NEXTHLuseBC NEXT_HL
COPYBC
; the token and literals are followed by the offset
push de : or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
; "10x": the case of the 13-bit offset
CASE10x: ld c,a : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca
.noUpdate ld d,a : ld a,c
cp %10100000 : dec d : rl d : jr ReadOffsetE
; "110": 16-bit offset
CASE110: ld d,(hl) : NEXT_HL : jr ReadOffsetE
; "00x": the case of the 5-bit offset
CASE00x: ld c,a : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca
.noUpdate ld e,a : ld a,c
cp %00100000 : rl e : jp SaveOffset
LongerMatch: scf : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca
.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch
IFNDEF UNROLL_LONG_MATCHES
LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch
ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC
pop de : ret
ELSE
LongMatch: add (hl) : NEXT_HL : jr c,VeryLongMatch
ld c,a
.useC ex (sp),hl
ADD_OFFSET
COPY1
; this is an unrolled equivalent of LDIR
xor a : sub c
and 8-1 : add a
ld (.jrOffset),a : jr nz,$+2
.jrOffset EQU $-1
.fastLDIR DUP 8
COPY1
EDUP
jp pe,.fastLDIR
jp CopyMatch.popSrc
VeryLongMatch: ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,LongMatch.useC
pop de : ret
ENDIF

187
asm/z80/unlzsa2_small.asm Normal file
View File

@ -0,0 +1,187 @@
;
; Size-optimized LZSA2 decompressor by spke & uniabis (139 bytes)
;
; ver.00 by spke for LZSA 1.0.0 (02-09/06/2019, 145 bytes);
; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
; ver.02 by uniabis (30/07/2019, 144(-1) bytes, +3.3% speed and support for Hitachi HD64180);
; ver.03 by spke for LZSA 1.0.7 (01/08/2019, 140(-4) bytes, -1.4% speed and small re-organization of macros);
; ver.04 by spke for LZSA 1.1.0 (26/09/2019, removed usage of IY, added full revision history)
; ver.05 by spke for LZSA 1.1.1 (11/10/2019, 139(-1) bytes, +0.1% speed)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f2 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA2
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f2 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA2
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
;
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ENDIF
IFNDEF HD64180
MACRO LD_IX_DE
ld ixl,e : ld ixh,d
ENDM
MACRO LD_DE_IX
ld e,ixl : ld d,ixh
ENDM
ELSE
MACRO LD_IX_DE
push de : pop ix
ENDM
MACRO LD_DE_IX
push ix : pop de
ENDM
ENDIF
@DecompressLZSA2:
xor a : ld b,a : exa : jr ReadToken
CASE00x: call ReadNibble
ld e,a : ld a,c
cp %00100000 : rl e : jr SaveOffset
CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x
CASE01x: cp %01100000 : rl d
OffsetReadE: ld e,(hl) : NEXT_HL
SaveOffset: LD_IX_DE
MatchLen: and %00000111 : add 2 : cp 9 : call z,ExtendedCode
CopyMatch: ld c,a
ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP -> [src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
BLOCKCOPY ; BC = 0, DE = dest
pop hl ; HL = src
ReadToken: ld a,(hl) : NEXT_HL : push af
and %00011000 : jr z,NoLiterals
rrca : rrca : rrca
call pe,ExtendedCode
ld c,a
BLOCKCOPY
NoLiterals: pop af : push de
or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
CASE10x: call ReadNibble
ld d,a : ld a,c
cp %10100000 ;: rl d
dec d : rl d : DB #CA ; jr OffsetReadE ; #CA is JP Z,.. to skip all commands in CASE110 before jr OffsetReadE
CASE110: ld d,(hl) : NEXT_HL : jr OffsetReadE
CASE11x cp %11100000 : jr c,CASE110
CASE111: LD_DE_IX : jr MatchLen
ExtendedCode: call ReadNibble : inc a : jr z,ExtraByte
sub #F0+1 : add c : ret
ExtraByte ld a,15 : add c : add (hl) : NEXT_HL : ret nc
ld a,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : ret nz
pop de : pop de ; RET is not needed, because RET from ReadNibble is sufficient
ReadNibble: ld c,a : xor a : exa : ret m
UpdateNibble ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca : ret

BIN
pareto_graph.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

101
src/dictionary.c Normal file
View File

@ -0,0 +1,101 @@
/*
* dictionary.c - dictionary implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "format.h"
#include "lib.h"
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
* @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) {
unsigned char *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE);
if (!pDictionaryData) {
return LZSA_ERROR_MEMORY;
}
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
if (!pDictionaryFile) {
free(pDictionaryData);
pDictionaryData = NULL;
return LZSA_ERROR_DICTIONARY;
}
fseek(pDictionaryFile, 0, SEEK_END);
#ifdef _WIN32
__int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
#else
off_t nDictionaryFileSize = ftello(pDictionaryFile);
#endif
if (nDictionaryFileSize > BLOCK_SIZE) {
/* Use the last BLOCK_SIZE bytes of the dictionary */
fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
}
else {
fseek(pDictionaryFile, 0, SEEK_SET);
}
nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(pDictionaryFile);
pDictionaryFile = NULL;
}
*ppDictionaryData = pDictionaryData;
*pDictionaryDataSize = nDictionaryDataSize;
return LZSA_OK;
}
/**
* Free dictionary contents
*
* @param ppDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData) {
if (*ppDictionaryData) {
free(*ppDictionaryData);
*ppDictionaryData = NULL;
}
}

64
src/dictionary.h Normal file
View File

@ -0,0 +1,64 @@
/*
* dictionary.h - dictionary definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _DICTIONARY_H
#define _DICTIONARY_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
* @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
/**
* Free dictionary contents
*
* @param ppDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData);
#ifdef __cplusplus
}
#endif
#endif /* _DICTIONARY_H */

View File

@ -1,239 +0,0 @@
/*
* expand.c - block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "format.h"
#include "expand.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
#else /* _MSC_VER */
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData;
if (nLiterals == LITERALS_RUN_LEN) {
unsigned char nByte;
if (pInBlock >= pInBlockEnd) return -1;
nByte = *pInBlock++;
nLiterals += (int)((unsigned int)nByte);
if (nByte == 254) {
if (pInBlock >= pInBlockEnd) return -1;
nLiterals += (int)((unsigned int)*pInBlock++);
}
else if (nByte == 255) {
if ((pInBlock + 1) >= pInBlockEnd) return -1;
nLiterals = ((unsigned int)*pInBlock++);
nLiterals |= (((unsigned int)*pInBlock++) << 8);
}
}
if (nLiterals != 0) {
if ((pInBlock + nLiterals) > pInBlockEnd ||
(pCurOutData + nLiterals) > pOutDataEnd) {
return -1;
}
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
*ppInBlock = pInBlock;
*ppCurOutData = pCurOutData;
return 0;
}
static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData;
if (nMatchLen == MATCH_RUN_LEN) {
unsigned char nByte;
if (pInBlock >= pInBlockEnd) return -1;
nByte = *pInBlock++;
nMatchLen += (int)((unsigned int)nByte);
if (nByte == 254) {
if (pInBlock >= pInBlockEnd) return -1;
nMatchLen += (int)((unsigned int)*pInBlock++);
}
else if (nByte == 255) {
if ((pInBlock + 1) >= pInBlockEnd) return -1;
nMatchLen = ((unsigned int)*pInBlock++);
nMatchLen |= (((unsigned int)*pInBlock++) << 8);
}
}
nMatchLen += MIN_MATCH_SIZE;
if ((pCurOutData + nMatchLen) > pOutDataEnd) {
return -1;
}
if ((pSrc + 1) == pCurOutData && nMatchLen >= 16) {
/* One-byte RLE */
memset(pCurOutData, *pSrc, nMatchLen);
pCurOutData += nMatchLen;
}
else {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
int nMaxFast = nMatchLen;
if (nMaxFast > (pCurOutData - pSrc))
nMaxFast = (int)(pCurOutData - pSrc);
if ((pCurOutData + nMaxFast) > (pOutDataFastEnd - 15))
nMaxFast = (int)(pOutDataFastEnd - 15 - pCurOutData);
if (nMaxFast > 0) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMaxFast;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMaxFast;
pSrc += nMaxFast;
nMatchLen -= nMaxFast;
}
while (nMatchLen >= 4) {
*pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++;
nMatchLen -= 4;
}
while (nMatchLen > 0) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
*ppInBlock = pInBlock;
*ppCurOutData = pCurOutData;
return 0;
}
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 16;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 16;
/* Fast loop */
while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) {
const unsigned char token = *pInBlock++;
int nLiterals = (int)((unsigned int)((token & 0x70) >> 4));
if (nLiterals < LITERALS_RUN_LEN) {
memcpy(pCurOutData, pInBlock, 8);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
return -1;
}
if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */
int nMatchOffset;
nMatchOffset = ((unsigned int)*pInBlock++);
if (token & 0x80) {
if (pInBlock >= pInBlockEnd) return -1;
nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
}
nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc < pOutData)
return -1;
int nMatchLen = (int)((unsigned int)(token & 0x0f));
if (nMatchLen < (16 - MIN_MATCH_SIZE + 1) && (pSrc + MIN_MATCH_SIZE + nMatchLen) < pCurOutData && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pSrc, 16);
pCurOutData += (MIN_MATCH_SIZE + nMatchLen);
}
else {
if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1;
}
}
}
/* Slow loop for the remainder of the buffer */
while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++;
int nLiterals = (int)((unsigned int)((token & 0x70) >> 4));
if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
return -1;
if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */
int nMatchOffset;
nMatchOffset = ((unsigned int)*pInBlock++);
if (token & 0x80) {
if (pInBlock >= pInBlockEnd) return -1;
nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
}
nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc < pOutData)
return -1;
int nMatchLen = (int)((unsigned int)(token & 0x0f));
if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1;
}
}
return (int)(pCurOutData - (pOutData + nOutDataOffset));
}

224
src/expand_block_v1.c Normal file
View File

@ -0,0 +1,224 @@
/*
* expand_block_v1.c - LZSA1 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "format.h"
#include "expand_block_v1.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
#else /* _MSC_VER */
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) {
unsigned int nByte;
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
nByte = *pInBlock++;
(*nLiterals) += nByte;
if (nByte == 250) {
if (pInBlock < pInBlockEnd) {
(*nLiterals) = 256 + ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if (nByte == 249) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nLiterals) = ((unsigned int)*pInBlock++);
(*nLiterals) |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
*ppInBlock = pInBlock;
return 0;
}
else {
return -1;
}
}
static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) {
unsigned int nByte;
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
nByte = *pInBlock++;
(*nMatchLen) += nByte;
if (nByte == 239) {
if (pInBlock < pInBlockEnd) {
(*nMatchLen) = 256 + ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if (nByte == 238) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nMatchLen) = ((unsigned int)*pInBlock++);
(*nMatchLen) |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
*ppInBlock = pInBlock;
return 0;
}
else {
return -1;
}
}
/**
* Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 18;
while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pInBlock, 8);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
if (nLiterals == LITERALS_RUN_LEN_V1) {
if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals))
return -1;
}
if (nLiterals != 0) {
if ((pInBlock + nLiterals) <= pInBlockEnd &&
(pCurOutData + nLiterals) <= pOutDataEnd) {
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
return -1;
}
}
}
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned int nMatchOffset;
nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff;
if (token & 0x80) {
nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00;
}
nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 18) <= pOutDataEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 8);
memcpy(pCurOutData + 16, pSrc + 16, 2);
pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
}
else {
nMatchLen += MIN_MATCH_SIZE_V1;
if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen))
return -1;
if (nMatchLen == 0)
break;
}
if ((pSrc + nMatchLen) <= pOutDataEnd) {
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
return -1;
}
}
else {
return -1;
}
}
}
else {
return -1;
}
}
}
return (int)(pCurOutData - (pOutData + nOutDataOffset));
}

24
src/expand.h → src/expand_block_v1.h Executable file → Normal file
View File

@ -1,5 +1,5 @@
/*
* expand.h - block decompressor definitions
* expand_block_v1.h - LZSA1 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -20,20 +20,30 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _EXPAND_H
#define _EXPAND_H
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_BLOCK_V1_H
#define _EXPAND_BLOCK_V1_H
/**
* Decompress one data block
* Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_H */
#endif /* _EXPAND_BLOCK_V1_H */

253
src/expand_block_v2.c Normal file
View File

@ -0,0 +1,253 @@
/*
* expand_block_v2.c - LZSA2 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "format.h"
#include "expand_block_v2.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
#else /* _MSC_VER */
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) {
if ((*nCurNibbles ^= 1) != 0) {
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
(*nibbles) = *pInBlock++;
*ppInBlock = pInBlock;
(*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
return 0;
}
else {
return -1;
}
}
(*nValue) = (unsigned int)((*nibbles) & 0x0f);
return 0;
}
static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) {
unsigned int nValue;
if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) {
(*nLength) += nValue;
if (nValue == 15) {
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
(*nLength) += ((unsigned int)*pInBlock++);
if ((*nLength) == 257) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nLength) = ((unsigned int)*pInBlock++);
(*nLength) |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
else if ((*nLength) == 256) {
(*nLength) = 0;
}
}
else {
return -1;
}
*ppInBlock = pInBlock;
}
return 0;
}
else {
return -1;
}
}
/**
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
int nCurNibbles = 0;
unsigned char nibbles;
int nMatchOffset = 0;
while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pInBlock, 4);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
if (nLiterals == LITERALS_RUN_LEN_V2) {
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals))
return -1;
}
if (nLiterals != 0) {
if ((pInBlock + nLiterals) <= pInBlockEnd &&
(pCurOutData + nLiterals) <= pOutDataEnd) {
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
return -1;
}
}
}
if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned char nOffsetMode = token & 0xc0;
unsigned int nValue;
switch (nOffsetMode) {
case 0x00:
/* 5 bit offset */
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
return -1;
nMatchOffset = nValue << 1;
nMatchOffset |= ((token & 0x20) >> 5);
nMatchOffset ^= 0x1e;
nMatchOffset++;
break;
case 0x40:
/* 9 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
nMatchOffset ^= 0x0ff;
nMatchOffset++;
break;
case 0x80:
/* 13 bit offset */
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
return -1;
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (nValue << 9);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
nMatchOffset ^= 0x1eff;
nMatchOffset += (512 + 1);
break;
default:
/* Check if this is a 16 bit offset or a rep-match */
if ((token & 0x20) == 0) {
/* 16 bit offset */
nMatchOffset = (((unsigned int)(*pInBlock++)) << 8);
if (pInBlock >= pInBlockEnd) return -1;
nMatchOffset |= (unsigned int)(*pInBlock++);
nMatchOffset ^= 0xffff;
nMatchOffset++;
}
break;
}
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x07);
if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 10) <= pOutDataEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 2);
pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
}
else {
nMatchLen += MIN_MATCH_SIZE_V2;
if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen))
return -1;
if (nMatchLen == 0)
break;
}
if ((pSrc + nMatchLen) <= pOutDataEnd) {
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
return -1;
}
}
else {
return -1;
}
}
}
else {
return -1;
}
}
}
return (int)(pCurOutData - (pOutData + nOutDataOffset));
}

49
src/expand_block_v2.h Normal file
View File

@ -0,0 +1,49 @@
/*
* expand_block_v2.h - LZSA2 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_BLOCK_V2_H
#define _EXPAND_BLOCK_V2_H
/**
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_BLOCK_V2_H */

76
src/expand_context.c Normal file
View File

@ -0,0 +1,76 @@
/*
* expand_context.h - decompressor context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_context.h"
#include "expand_block_v1.h"
#include "expand_block_v2.h"
#include "lib.h"
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags (LZSA_FLAG_xxx)
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion, const int nFlags) {
int nDecompressedSize;
if (nFlags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInBlock, nBlockSize);
}
if (nFormatVersion == 1)
nDecompressedSize = lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else if (nFormatVersion == 2)
nDecompressedSize = lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else
nDecompressedSize = -1;
if (nDecompressedSize != -1 && (nFlags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData + nOutDataOffset, nDecompressedSize);
}
if (nFlags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInBlock, nBlockSize);
}
return nDecompressedSize;
}

61
src/expand_context.h Normal file
View File

@ -0,0 +1,61 @@
/*
* expand_context.h - decompressor context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_CONTEXT_H
#define _EXPAND_CONTEXT_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags (LZSA_FLAG_xxx)
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion, const int nFlags);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_CONTEXT_H */

163
src/expand_inmem.c Normal file
View File

@ -0,0 +1,163 @@
/*
* expand_inmem.c - in-memory decompression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_inmem.h"
#include "lib.h"
#include "frame.h"
#define BLOCK_SIZE 65536
/**
* Get maximum decompressed size of compressed data
*
* @param pFileData compressed data
* @param nFileSize compressed size in bytes
*
* @return maximum decompressed size
*/
size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize) {
const unsigned char *pCurFileData = pFileData;
const unsigned char *pEndFileData = pCurFileData + nFileSize;
int nFormatVersion = 0;
size_t nMaxDecompressedSize = 0;
const int nHeaderSize = lzsa_get_header_size();
/* Check header */
if ((pCurFileData + nHeaderSize) > pEndFileData ||
lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
return -1;
pCurFileData += nHeaderSize;
while (pCurFileData < pEndFileData) {
unsigned int nBlockDataSize = 0;
int nIsUncompressed = 0;
const int nFrameSize = lzsa_get_frame_size();
/* Decode frame header */
if ((pCurFileData + nFrameSize) > pEndFileData ||
lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
return -1;
pCurFileData += nFrameSize;
if (!nBlockDataSize)
break;
/* Add one potentially full block to the decompressed size */
nMaxDecompressedSize += BLOCK_SIZE;
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
pCurFileData += nBlockDataSize;
}
return nMaxDecompressedSize;
}
/**
* Decompress data in memory
*
* @param pFileData compressed data
* @param pOutBuffer buffer for decompressed data
* @param nFileSize compressed size in bytes
* @param nMaxOutBufferSize maximum capacity of decompression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param pFormatVersion pointer to format version, updated if this function is successful
*
* @return actual decompressed size, or -1 for error
*/
size_t lzsa_decompress_inmem(unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion) {
unsigned char *pCurFileData = pFileData;
const unsigned char *pEndFileData = pCurFileData + nFileSize;
unsigned char *pCurOutBuffer = pOutBuffer;
const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize;
int nPreviousBlockSize;
const int nHeaderSize = lzsa_get_header_size();
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
return (size_t)lzsa_decompressor_expand_block(pFileData, (int)nFileSize, pOutBuffer, 0, (int)nMaxOutBufferSize, *pFormatVersion, nFlags);
}
/* Check header */
if ((pCurFileData + nHeaderSize) > pEndFileData ||
lzsa_decode_header(pCurFileData, nHeaderSize, pFormatVersion) != 0)
return -1;
pCurFileData += nHeaderSize;
nPreviousBlockSize = 0;
while (pCurFileData < pEndFileData) {
unsigned int nBlockDataSize = 0;
int nIsUncompressed = 0;
const int nFrameSize = lzsa_get_frame_size();
/* Decode frame header */
if ((pCurFileData + nFrameSize) > pEndFileData ||
lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
return -1;
pCurFileData += nFrameSize;
if (!nBlockDataSize)
break;
if (!nIsUncompressed) {
int nDecompressedSize;
/* Decompress block */
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
nDecompressedSize = lzsa_decompressor_expand_block(pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize), *pFormatVersion, nFlags);
if (nDecompressedSize < 0)
return -1;
pCurOutBuffer += nDecompressedSize;
nPreviousBlockSize = nDecompressedSize;
}
else {
/* Copy uncompressed block */
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer)
return -1;
memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize);
pCurOutBuffer += nBlockDataSize;
}
pCurFileData += nBlockDataSize;
}
return (int)(pCurOutBuffer - pOutBuffer);
}

70
src/expand_inmem.h Normal file
View File

@ -0,0 +1,70 @@
/*
* expand_inmem.h - in-memory decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_INMEM_H
#define _EXPAND_INMEM_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get maximum decompressed size of compressed data
*
* @param pFileData compressed data
* @param nFileSize compressed size in bytes
*
* @return maximum decompressed size
*/
size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize);
/**
* Decompress data in memory
*
* @param pFileData compressed data
* @param pOutBuffer buffer for decompressed data
* @param nFileSize compressed size in bytes
* @param nMaxOutBufferSize maximum capacity of decompression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param pFormatVersion pointer to format version, updated if this function is successful
*
* @return actual decompressed size, or -1 for error
*/
size_t lzsa_decompress_inmem(unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_INMEM_H */

236
src/expand_streaming.c Normal file
View File

@ -0,0 +1,236 @@
/*
* expand_streaming.c - streaming decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_streaming.h"
#include "format.h"
#include "frame.h"
#include "lib.h"
/*-------------- File API -------------- */
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
lzsa_status_t nStatus;
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
return LZSA_ERROR_SRC;
}
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
inStream.close(&inStream);
return LZSA_ERROR_DST;
}
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
/*-------------- Streaming API -------------- */
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
unsigned char cFrameData[16];
unsigned char *pInBlock;
unsigned char *pOutData;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
const int nHeaderSize = lzsa_get_header_size();
memset(cFrameData, 0, 16);
if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) {
return LZSA_ERROR_SRC;
}
if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
return LZSA_ERROR_FORMAT;
}
nCompressedSize += (long long)nHeaderSize;
}
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
if (!pInBlock) {
return LZSA_ERROR_MEMORY;
}
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pOutData) {
free(pInBlock);
pInBlock = NULL;
return LZSA_ERROR_MEMORY;
}
int nDecompressionError = 0;
int nPrevDecompressedSize = 0;
int nNumBlocks = 0;
while (!pInStream->eof(pInStream) && !nDecompressionError) {
unsigned int nBlockSize = 0;
int nIsUncompressed = 0;
if (nPrevDecompressedSize != 0) {
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
}
else if (nDictionaryDataSize && pDictionaryData) {
nPrevDecompressedSize = nDictionaryDataSize;
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize);
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
const int nFrameSize = lzsa_get_frame_size();
memset(cFrameData, 0, 16);
if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) {
if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
nDecompressionError = LZSA_ERROR_FORMAT;
nBlockSize = 0;
}
nCompressedSize += (long long)nFrameSize;
}
else {
nDecompressionError = LZSA_ERROR_SRC;
nBlockSize = 0;
}
}
else {
if (!nNumBlocks)
nBlockSize = BLOCK_SIZE;
else
nBlockSize = 0;
}
if (nBlockSize != 0) {
int nDecompressedSize = 0;
if ((int)nBlockSize > BLOCK_SIZE) {
nDecompressionError = LZSA_ERROR_FORMAT;
break;
}
size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize);
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
nBlockSize = (unsigned int)nReadBytes;
}
if (nReadBytes == nBlockSize) {
nCompressedSize += (long long)nReadBytes;
if (nIsUncompressed) {
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
nDecompressedSize = nBlockSize;
}
else {
nDecompressedSize = lzsa_decompressor_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE, nFormatVersion, nFlags);
if (nDecompressedSize < 0) {
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
break;
}
}
if (nDecompressedSize != 0) {
nOriginalSize += (long long)nDecompressedSize;
if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize)
nDecompressionError = LZSA_ERROR_DST;
nPrevDecompressedSize = nDecompressedSize;
nDecompressedSize = 0;
}
}
else {
break;
}
nNumBlocks++;
}
else {
break;
}
}
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
*pOriginalSize = nOriginalSize;
*pCompressedSize = nCompressedSize;
return nDecompressionError;
}

86
src/expand_streaming.h Normal file
View File

@ -0,0 +1,86 @@
/*
* expand_streaming.h - streaming decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_STREAMING_H
#define _EXPAND_STREAMING_H
#include "stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
/*-------------- File API -------------- */
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
/*-------------- Streaming API -------------- */
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_STREAMING_H */

View File

@ -20,13 +20,32 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _FORMAT_H
#define _FORMAT_H
#define MIN_MATCH_SIZE 3
#define MIN_OFFSET 1
#define MAX_OFFSET 0xffff
#define LITERALS_RUN_LEN 7
#define MATCH_RUN_LEN 15
#define MAX_VARLEN 0xffff
#define BLOCK_SIZE 65536
#define MIN_MATCH_SIZE_V1 3
#define LITERALS_RUN_LEN_V1 7
#define MATCH_RUN_LEN_V1 15
#define MIN_MATCH_SIZE_V2 2
#define LITERALS_RUN_LEN_V2 3
#define MATCH_RUN_LEN_V2 7
#endif /* _FORMAT_H */

189
src/frame.c Normal file
View File

@ -0,0 +1,189 @@
/*
* frame.c - frame implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "frame.h"
#define LZSA_ID_0 0x7b
#define LZSA_ID_1 0x9e
/**
* Get compressed file header size
*
* @return file header size
*/
int lzsa_get_header_size(void) {
return 3;
}
/**
* Get compressed frame header size
*
* @return frame header size
*/
int lzsa_get_frame_size(void) {
return 3;
}
/**
* Encode file header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) {
if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) {
pFrameData[0] = LZSA_ID_0; /* Magic number */
pFrameData[1] = LZSA_ID_1;
pFrameData[2] = (nFormatVersion == 2) ? 0x20 : 0; /* Format version 1 */
return 3;
}
else {
return -1;
}
}
/**
* Encode compressed block frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nBlockDataSize compressed block's data size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_compressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize) {
if (nMaxFrameDataSize >= 3 && nBlockDataSize <= 0x7fffff) {
pFrameData[0] = nBlockDataSize & 0xff;
pFrameData[1] = (nBlockDataSize >> 8) & 0xff;
pFrameData[2] = (nBlockDataSize >> 16) & 0x7f;
return 3;
}
else {
return -1;
}
}
/**
* Encode uncompressed block frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nBlockDataSize uncompressed block's data size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_uncompressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize) {
if (nMaxFrameDataSize >= 3 && nBlockDataSize <= 0x7fffff) {
pFrameData[0] = nBlockDataSize & 0xff;
pFrameData[1] = (nBlockDataSize >> 8) & 0xff;
pFrameData[2] = ((nBlockDataSize >> 16) & 0x7f) | 0x80; /* Uncompressed block */
return 3;
}
else {
return -1;
}
}
/**
* Encode terminal frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataSize) {
if (nMaxFrameDataSize >= 3) {
pFrameData[0] = 0x00; /* EOD frame */
pFrameData[1] = 0x00;
pFrameData[2] = 0x00;
return 3;
}
else {
return -1;
}
}
/**
* Decode file header
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
*
* @return 0 for success, or -1 for failure
*/
int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion) {
if (nFrameDataSize != 3 ||
pFrameData[0] != LZSA_ID_0 ||
pFrameData[1] != LZSA_ID_1 ||
(pFrameData[2] & 0x1f) != 0 ||
((pFrameData[2] & 0xe0) != 0x00 && (pFrameData[2] & 0xe0) != 0x20)) {
return -1;
}
else {
*nFormatVersion = (pFrameData[2] & 0xe0) ? 2 : 1;
return 0;
}
}
/**
* Decode frame header
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
* @param nBlockSize pointer to block size, updated if this function succeeds (set to 0 if this is the terminal frame)
* @param nIsUncompressed pointer to compressed block flag, updated if this function succeeds
*
* @return 0 for success, or -1 for failure
*/
int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed) {
if (nFrameDataSize == 3) {
*nBlockSize = ((unsigned int)pFrameData[0]) |
(((unsigned int)pFrameData[1]) << 8) |
(((unsigned int)pFrameData[2]) << 16);
*nIsUncompressed = ((*nBlockSize & 0x800000) != 0) ? 1 : 0;
*nBlockSize &= 0x7fffff;
return 0;
}
else {
return -1;
}
}

122
src/frame.h Normal file
View File

@ -0,0 +1,122 @@
/*
* frame.h - frame definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _FRAME_H
#define _FRAME_H
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get compressed file header size
*
* @return file header size
*/
int lzsa_get_header_size(void);
/**
* Get compressed frame header size
*
* @return frame header size
*/
int lzsa_get_frame_size(void);
/**
* Encode file header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion);
/**
* Encode compressed block frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nBlockDataSize compressed block's data size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_compressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize);
/**
* Encode uncompressed block frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nBlockDataSize uncompressed block's data size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_uncompressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize);
/**
* Encode terminal frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataSize);
/**
* Decode file header
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
*
* @return 0 for success, or -1 for failure
*/
int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion);
/**
* Decode frame header
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
* @param nBlockSize pointer to block size, updated if this function succeeds (set to 0 if this is the terminal frame)
* @param nIsUncompressed pointer to compressed block flag, updated if this function succeeds
*
* @return 0 for success, or -1 for failure
*/
int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed);
#ifdef __cplusplus
}
#endif
#endif /* _FRAME_H */

95
src/lib.h Executable file
View File

@ -0,0 +1,95 @@
/*
* lib.h - LZSA library definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _LIB_H
#define _LIB_H
#include "stream.h"
#include "dictionary.h"
#include "frame.h"
#include "format.h"
#include "shrink_context.h"
#include "shrink_streaming.h"
#include "shrink_inmem.h"
#include "expand_context.h"
#include "expand_streaming.h"
#include "expand_inmem.h"
#ifdef __cplusplus
extern "C" {
#endif
/** High level status for compression and decompression */
typedef enum _lzsa_status_t {
LZSA_OK = 0, /**< Success */
LZSA_ERROR_SRC, /**< Error reading input */
LZSA_ERROR_DST, /**< Error reading output */
LZSA_ERROR_DICTIONARY, /**< Error reading dictionary */
LZSA_ERROR_MEMORY, /**< Out of memory */
/* Compression-specific status codes */
LZSA_ERROR_COMPRESSION, /**< Internal compression error */
LZSA_ERROR_RAW_TOOLARGE, /**< Input is too large to be compressed to a raw block */
LZSA_ERROR_RAW_UNCOMPRESSED, /**< Input is incompressible and raw blocks don't support uncompressed data */
/* Decompression-specific status codes */
LZSA_ERROR_FORMAT, /**< Invalid input format or magic number when decompressing */
LZSA_ERROR_DECOMPRESSION /**< Internal decompression error */
} lzsa_status_t;
/* Compression flags */
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
#define LZSA_FLAG_RAW_BACKWARD (1<<2) /**< 1 to compress or decompress raw block backward */
/**
* Reverse bytes in the specified buffer
*
* @param pBuffer pointer to buffer whose contents are to be reversed
* @param nBufferSize size of buffer in bytes
*/
static inline void lzsa_reverse_buffer(unsigned char *pBuffer, const int nBufferSize) {
int nMidPoint = nBufferSize / 2;
int i, j;
for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) {
unsigned char c = pBuffer[i];
pBuffer[i] = pBuffer[j];
pBuffer[j] = c;
}
}
#ifdef __cplusplus
}
#endif
#endif /* _LIB_H */

View File

@ -50,17 +50,38 @@ typedef int saidx_t;
#define PRIdSAIDX_T "d"
#endif
/*- divsufsort context */
typedef struct _divsufsort_ctx_t {
saidx_t *bucket_A;
saidx_t *bucket_B;
} divsufsort_ctx_t;
/*- Prototypes -*/
/**
* Initialize suffix array context
*
* @return 0 for success, or non-zero in case of an error
*/
int divsufsort_init(divsufsort_ctx_t *ctx);
/**
* Destroy suffix array context
*
* @param ctx suffix array context to destroy
*/
void divsufsort_destroy(divsufsort_ctx_t *ctx);
/**
* Constructs the suffix array of a given string.
* @param ctx suffix array context
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array of suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n);
#if 0
/**

View File

@ -31,9 +31,7 @@
extern "C" {
#endif /* __cplusplus */
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include "divsufsort_config.h"
#include <assert.h>
#include <stdio.h>
#if HAVE_STRING_H

View File

@ -327,11 +327,47 @@ construct_BWT(const sauchar_t *T, saidx_t *SA,
/*---------------------------------------------------------------------------*/
/**
* Initialize suffix array context
*
* @return 0 for success, or non-zero in case of an error
*/
int divsufsort_init(divsufsort_ctx_t *ctx) {
ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
ctx->bucket_B = NULL;
if (ctx->bucket_A) {
ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
if (ctx->bucket_B)
return 0;
}
divsufsort_destroy(ctx);
return -1;
}
/**
* Destroy suffix array context
*
* @param ctx suffix array context to destroy
*/
void divsufsort_destroy(divsufsort_ctx_t *ctx) {
if (ctx->bucket_B) {
free(ctx->bucket_B);
ctx->bucket_B = NULL;
}
if (ctx->bucket_A) {
free(ctx->bucket_A);
ctx->bucket_A = NULL;
}
}
/*- Function -*/
saint_t
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
saidx_t *bucket_A, *bucket_B;
divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) {
saidx_t m;
saint_t err = 0;
@ -341,20 +377,14 @@ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
else if(n == 1) { SA[0] = 0; return 0; }
else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
/* Suffixsort. */
if((bucket_A != NULL) && (bucket_B != NULL)) {
m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
construct_SA(T, SA, bucket_A, bucket_B, n, m);
if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) {
m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n);
construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m);
} else {
err = -2;
}
free(bucket_B);
free(bucket_A);
return err;
}

1110
src/lzsa.c Executable file

File diff suppressed because it is too large Load Diff

View File

@ -1,755 +0,0 @@
/*
* main.c - command line compression utility for the LZSA format
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#include <sys/timeb.h>
#else
#include <sys/time.h>
#endif
#include "format.h"
#include "shrink.h"
#include "expand.h"
#define BLOCK_SIZE 65536
#define OPT_VERBOSE 1
#define OPT_RAW 2
/*---------------------------------------------------------------------------*/
static long long lzsa_get_time() {
long long nTime;
#ifdef _WIN32
struct _timeb tb;
_ftime(&tb);
nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL;
#else
struct timeval tm;
gettimeofday(&tm, NULL);
nTime = (long long)tm.tv_sec * 1000000LL + (long long)tm.tv_usec;
#endif
return nTime;
}
/*---------------------------------------------------------------------------*/
static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) {
FILE *f_in, *f_out;
unsigned char *pInData, *pOutData;
lsza_compressor compressor;
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nResult;
bool bError = false;
f_in = fopen(pszInFilename, "rb");
if (!f_in) {
fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
return 100;
}
f_out = fopen(pszOutFilename, "wb");
if (!f_out) {
fprintf(stderr, "error opening '%s' for writing\n", pszOutFilename);
return 100;
}
pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pInData) {
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
fprintf(stderr, "out of memory\n");
return 100;
}
memset(pInData, 0, BLOCK_SIZE * 2);
pOutData = (unsigned char*)malloc(BLOCK_SIZE);
if (!pOutData) {
free(pInData);
pInData = NULL;
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
fprintf(stderr, "out of memory\n");
return 100;
}
memset(pInData, 0, BLOCK_SIZE);
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2);
if (nResult != 0) {
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
fprintf(stderr, "error initializing compressor\n");
return 100;
}
if ((nOptions & OPT_RAW) == 0) {
unsigned char cHeader[3];
cHeader[0] = 0x7b; /* Magic number: 0x9e7b */
cHeader[1] = 0x9e;
cHeader[2] = 0; /* Format version 1 */
bError = fwrite(cHeader, 1, 3, f_out) != 3;
nCompressedSize += 3LL;
}
if (nOptions & OPT_VERBOSE) {
nStartTime = lzsa_get_time();
}
int nPreviousBlockSize = 0;
while (!feof(f_in) && !bError) {
int nInDataSize;
if (nPreviousBlockSize) {
memcpy(pInData, pInData + BLOCK_SIZE, nPreviousBlockSize);
}
nInDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE, f_in);
if (nInDataSize > 0) {
if (nPreviousBlockSize && (nOptions & OPT_RAW) != 0) {
fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n");
bError = true;
break;
}
int nOutDataSize;
nOutDataSize = lzsa_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize);
if (nOutDataSize >= 0) {
/* Write compressed block */
if ((nOptions & OPT_RAW) == 0) {
unsigned char cBlockSize[3];
cBlockSize[0] = nOutDataSize & 0xff;
cBlockSize[1] = (nOutDataSize >> 8) & 0xff;
cBlockSize[2] = (nOutDataSize >> 16) & 0xff;
if (fwrite(cBlockSize, 1, 3, f_out) != (size_t)3) {
bError = true;
}
}
if (!bError) {
if (fwrite(pOutData, 1, (size_t)nOutDataSize, f_out) != (size_t)nOutDataSize) {
bError = true;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += 3LL + (long long)nOutDataSize;
}
}
}
else {
/* Write uncompressible, literal block */
if ((nOptions & OPT_RAW) != 0) {
fprintf(stderr, "error: data is incompressible, raw blocks only support compressed data\n");
bError = true;
break;
}
unsigned char cBlockSize[3];
cBlockSize[0] = nInDataSize & 0xff;
cBlockSize[1] = (nInDataSize >> 8) & 0xff;
cBlockSize[2] = ((nInDataSize >> 16) & 0x7f) | 0x80; /* Uncompressed block */
if (fwrite(cBlockSize, 1, 3, f_out) != (size_t)3) {
bError = true;
}
else {
if (fwrite(pInData + BLOCK_SIZE, 1, (size_t)nInDataSize, f_out) != (size_t)nInDataSize) {
bError = true;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += 3LL + (long long)nInDataSize;
}
}
}
nPreviousBlockSize = nInDataSize;
}
if (!bError && !feof(f_in) && nOriginalSize >= 1024 * 1024) {
fprintf(stdout, "\r%lld => %lld (%g %%)", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
}
}
unsigned char cFooter[4];
int nFooterSize;
if ((nOptions & OPT_RAW) != 0) {
cFooter[0] = 0x00; /* EOD marker for raw block */
cFooter[1] = 0xff;
cFooter[2] = 0x00;
cFooter[3] = 0x00;
nFooterSize = 4;
}
else {
cFooter[0] = 0x00; /* EOD frame */
cFooter[1] = 0x00;
cFooter[2] = 0x00;
nFooterSize = 3;
}
if (!bError)
bError = fwrite(cFooter, 1, nFooterSize, f_out) != nFooterSize;
nCompressedSize += (long long)nFooterSize;
if (!bError && (nOptions & OPT_VERBOSE)) {
nEndTime = lzsa_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens, %lld into %lld bytes ==> %g %%\n",
pszInFilename, fDelta, fSpeed, lzsa_compressor_get_command_count(&compressor), nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
}
lzsa_compressor_destroy(&compressor);
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
if (bError) {
fprintf(stderr, "\rcompression error for '%s'\n", pszInFilename);
return 100;
}
else {
return 0;
}
}
/*---------------------------------------------------------------------------*/
static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL;
unsigned int nFileSize = 0;
FILE *pInFile = fopen(pszInFilename, "rb");
if (!pInFile) {
fprintf(stderr, "error opening input file\n");
return 100;
}
if ((nOptions & OPT_RAW) == 0) {
unsigned char cHeader[3];
memset(cHeader, 0, 3);
if (fread(cHeader, 1, 3, pInFile) != 3) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error reading header in input file\n");
return 100;
}
if (cHeader[0] != 0x7b ||
cHeader[1] != 0x9e ||
cHeader[2] != 0) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid magic number or format version in input file\n");
return 100;
}
}
else {
fseek(pInFile, 0, SEEK_END);
nFileSize = (unsigned int)ftell(pInFile);
fseek(pInFile, 0, SEEK_SET);
if (nFileSize < 4) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid file size for raw block mode\n");
return 100;
}
}
FILE *pOutFile = fopen(pszOutFilename, "wb");
if (!pOutFile) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
unsigned char *pInBlock;
unsigned char *pOutData;
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
if (!pInBlock) {
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pOutData) {
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
if (nOptions & OPT_VERBOSE) {
nStartTime = lzsa_get_time();
}
int nDecompressionError = 0;
int nPrevDecompressedSize = 0;
while (!feof(pInFile) && !nDecompressionError) {
unsigned char cBlockSize[3];
unsigned int nBlockSize = 0;
if (nPrevDecompressedSize != 0) {
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
}
if ((nOptions & OPT_RAW) == 0) {
if (fread(cBlockSize, 1, 3, pInFile) == 3) {
nBlockSize = ((unsigned int)cBlockSize[0]) |
(((unsigned int)cBlockSize[1]) << 8) |
(((unsigned int)cBlockSize[2]) << 16);
}
else {
nBlockSize = 0;
}
}
else {
nBlockSize = nFileSize - 4;
nFileSize = 0;
}
if (nBlockSize != 0) {
bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
int nDecompressedSize = 0;
nBlockSize &= 0x7fffff;
if (fread(pInBlock, 1, nBlockSize, pInFile) == nBlockSize) {
if (bIsUncompressed) {
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
nDecompressedSize = nBlockSize;
}
else {
unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) {
nDecompressionError = nDecompressedSize;
break;
}
}
if (nDecompressedSize != 0) {
nOriginalSize += (long long)nDecompressedSize;
fwrite(pOutData + BLOCK_SIZE, 1, nDecompressedSize, pOutFile);
nPrevDecompressedSize = nDecompressedSize;
nDecompressedSize = 0;
}
}
else {
break;
}
}
else {
break;
}
}
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
if (nDecompressionError) {
fprintf(stderr, "decompression error for '%s'\n", pszInFilename);
return 100;
}
else {
if (nOptions & OPT_VERBOSE) {
nEndTime = lzsa_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n",
pszInFilename, fDelta, fSpeed);
}
return 0;
}
}
static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL;
long long nKnownGoodSize = 0LL;
unsigned int nFileSize = 0;
FILE *pInFile = fopen(pszInFilename, "rb");
if (!pInFile) {
fprintf(stderr, "error opening compressed input file\n");
return 100;
}
if ((nOptions & OPT_RAW) == 0) {
unsigned char cHeader[3];
memset(cHeader, 0, 3);
if (fread(cHeader, 1, 3, pInFile) != 3) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error reading header in compressed input file\n");
return 100;
}
if (cHeader[0] != 0x7b ||
cHeader[1] != 0x9e ||
cHeader[2] != 0) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid magic number or format version in input file\n");
return 100;
}
}
else {
fseek(pInFile, 0, SEEK_END);
nFileSize = (unsigned int)ftell(pInFile);
fseek(pInFile, 0, SEEK_SET);
if (nFileSize < 4) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid file size for raw block mode\n");
return 100;
}
}
FILE *pOutFile = fopen(pszOutFilename, "rb");
if (!pOutFile) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening original uncompressed file\n");
return 100;
}
unsigned char *pInBlock;
unsigned char *pOutData;
unsigned char *pCompareData;
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
if (!pInBlock) {
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pOutData) {
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
pCompareData = (unsigned char*)malloc(BLOCK_SIZE);
if (!pCompareData) {
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
if (nOptions & OPT_VERBOSE) {
nStartTime = lzsa_get_time();
}
int nDecompressionError = 0;
bool bComparisonError = false;
int nPrevDecompressedSize = 0;
while (!feof(pInFile) && !nDecompressionError && !bComparisonError) {
unsigned int nBlockSize = 0;
if (nPrevDecompressedSize != 0) {
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
}
int nBytesToCompare = (int)fread(pCompareData, 1, BLOCK_SIZE, pOutFile);
if ((nOptions & OPT_RAW) == 0) {
unsigned char cBlockSize[3];
if (fread(cBlockSize, 1, 3, pInFile) == 3) {
nBlockSize = ((unsigned int)cBlockSize[0]) |
(((unsigned int)cBlockSize[1]) << 8) |
(((unsigned int)cBlockSize[2]) << 16);
}
else {
nBlockSize = 0;
}
}
else {
nBlockSize = nFileSize - 4;
nFileSize = 0;
}
if (nBlockSize != 0) {
bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
int nDecompressedSize = 0;
nBlockSize &= 0x7fffff;
if (fread(pInBlock, 1, nBlockSize, pInFile) == nBlockSize) {
if (bIsUncompressed) {
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
nDecompressedSize = nBlockSize;
}
else {
unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) {
nDecompressionError = nDecompressedSize;
break;
}
}
if (nDecompressedSize == nBytesToCompare) {
nKnownGoodSize = nOriginalSize;
nOriginalSize += (long long)nDecompressedSize;
if (memcmp(pOutData + BLOCK_SIZE, pCompareData, nBytesToCompare))
bComparisonError = true;
nPrevDecompressedSize = nDecompressedSize;
nDecompressedSize = 0;
}
else {
bComparisonError = true;
break;
}
}
else {
break;
}
}
else {
break;
}
}
free(pCompareData);
pCompareData = NULL;
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
if (nDecompressionError) {
fprintf(stderr, "decompression error for '%s'\n", pszInFilename);
return 100;
}
else if (bComparisonError) {
fprintf(stderr, "error comparing compressed file '%s' with original '%s' starting at %lld\n", pszInFilename, pszOutFilename, nKnownGoodSize);
return 100;
}
else {
if (nOptions & OPT_VERBOSE) {
nEndTime = lzsa_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n",
pszInFilename, fDelta, fSpeed);
}
return 0;
}
}
/*---------------------------------------------------------------------------*/
int main(int argc, char **argv) {
int i;
const char *pszInFilename = NULL;
const char *pszOutFilename = NULL;
bool bArgsError = false;
bool bCommandDefined = false;
bool bVerifyCompression = false;
char cCommand = 'z';
unsigned int nOptions = 0;
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-d")) {
if (!bCommandDefined) {
bCommandDefined = true;
cCommand = 'd';
}
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-z")) {
if (!bCommandDefined) {
bCommandDefined = true;
cCommand = 'z';
}
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-c")) {
if (!bVerifyCompression) {
bVerifyCompression = true;
}
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-v")) {
if ((nOptions & OPT_VERBOSE) == 0) {
nOptions |= OPT_VERBOSE;
}
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-r")) {
if ((nOptions & OPT_RAW) == 0) {
nOptions |= OPT_RAW;
}
else
bArgsError = true;
}
else {
if (!pszInFilename)
pszInFilename = argv[i];
else {
if (!pszOutFilename)
pszOutFilename = argv[i];
else
bArgsError = true;
}
}
}
if (bArgsError || !pszInFilename || !pszOutFilename) {
fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] <infile> <outfile>\n", argv[0]);
fprintf(stderr, " -c: check resulting stream after compressing\n");
fprintf(stderr, " -d: decompress (default: compress)\n");
fprintf(stderr, " -v: be verbose\n");
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
return 100;
}
if (cCommand == 'z') {
int nResult = lzsa_compress(pszInFilename, pszOutFilename, nOptions);
if (nResult == 0 && bVerifyCompression) {
nResult = lzsa_compare(pszOutFilename, pszInFilename, nOptions);
}
}
else if (cCommand == 'd') {
return lzsa_decompress(pszInFilename, pszOutFilename, nOptions);
}
else {
return 100;
}
}

361
src/matchfinder.c Normal file
View File

@ -0,0 +1,361 @@
/*
* matchfinder.c - LZ match finder implementation
*
* The following copying information applies to this specific source code file:
*
* Written in 2019 by Emmanuel Marty <marty.emmanuel@gmail.com>
* Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
* Dedication (the "CC0").
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
*
* You should have received a copy of the CC0 along with this software; if not
* see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "matchfinder.h"
#include "format.h"
#include "lib.h"
/**
* Hash index into TAG_BITS
*
* @param nIndex index value
*
* @return hash
*/
static inline int lzsa_get_index_tag(unsigned int nIndex) {
return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS));
}
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
/* Build suffix array from input data */
if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
return 100;
}
int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
int *Phi = PLCP;
int nCurLen = 0;
int i, r;
/* Compute the permuted LCP first (K<>rkk<6B>inen method) */
Phi[intervals[0]] = -1;
for (i = 1; i < nInWindowSize; i++)
Phi[intervals[i]] = intervals[i - 1];
for (i = 0; i < nInWindowSize; i++) {
if (Phi[i] == -1) {
PLCP[i] = 0;
continue;
}
int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
PLCP[i] = nCurLen;
if (nCurLen > 0)
nCurLen--;
}
/* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
* saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
* and the interval builder below doesn't need it either. */
intervals[0] &= POS_MASK;
int nMinMatchSize = pCompressor->min_match_size;
if (pCompressor->format_version >= 2) {
for (i = 1; i < nInWindowSize; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
int nTaggedLen = 0;
if (nLen)
nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nTaggedLen) << LCP_SHIFT);
}
}
else {
for (i = 1; i < nInWindowSize; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_AND_TAG_MAX)
nLen = LCP_AND_TAG_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
}
}
/**
* Build intervals for finding matches
*
* Methodology and code fragment taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
unsigned int * const SA_and_LCP = intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int next_interval_idx;
unsigned int *top = pCompressor->open_intervals;
unsigned int prev_pos = SA_and_LCP[0] & POS_MASK;
*top = 0;
intervals[0] = 0;
next_interval_idx = 1;
for (r = 1; r < nInWindowSize; r++) {
const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
const unsigned int top_lcp = *top & LCP_MASK;
if (next_lcp == top_lcp) {
/* Continuing the deepest open interval */
pos_data[prev_pos] = *top;
}
else if (next_lcp > top_lcp) {
/* Opening a new interval */
*++top = next_lcp | next_interval_idx++;
pos_data[prev_pos] = *top;
}
else {
/* Closing the deepest open interval */
pos_data[prev_pos] = *top;
for (;;) {
const unsigned int closed_interval_idx = *top-- & POS_MASK;
const unsigned int superinterval_lcp = *top & LCP_MASK;
if (next_lcp == superinterval_lcp) {
/* Continuing the superinterval */
intervals[closed_interval_idx] = *top;
break;
}
else if (next_lcp > superinterval_lcp) {
/* Creating a new interval that is a
* superinterval of the one being
* closed, but still a subinterval of
* its superinterval */
*++top = next_lcp | next_interval_idx++;
intervals[closed_interval_idx] = *top;
break;
}
else {
/* Also closing the superinterval */
intervals[closed_interval_idx] = *top;
}
}
}
prev_pos = next_pos;
}
/* Close any still-open intervals. */
pos_data[prev_pos] = *top;
for (; top > pCompressor->open_intervals; top--)
intervals[*top & POS_MASK] = *(top - 1);
/* Success */
return 0;
}
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return number of matches
*/
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref;
unsigned int super_ref;
unsigned int match_pos;
lzsa_match *matchptr;
int nPrevOffset = 0;
/**
* Find matches using intervals
*
* Taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
/* Get the deepest lcp-interval containing the current suffix. */
ref = pos_data[nOffset];
pos_data[nOffset] = 0;
/* Ascend until we reach a visited interval, the root, or a child of the
* root. Link unvisited intervals to the current suffix as we go. */
while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
ref = super_ref;
}
if (super_ref == 0) {
/* In this case, the current interval may be any of:
* (1) the root;
* (2) an unvisited child of the root */
if (ref != 0) /* Not the root? */
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
return 0;
}
/* Ascend indirectly via pos_data[] links. */
match_pos = super_ref & EXCL_VISITED_MASK;
matchptr = pMatches;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
for (;;) {
if ((super_ref = pos_data[match_pos]) > ref) {
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
pos_data[match_pos] = ref;
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
if (pCompressor->format_version >= 2) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
}
else {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
}
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
}
}
if (super_ref == 0)
break;
ref = super_ref;
match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (unsigned short)nMatchOffset;
if ((matchptr->length & 0x7fff) > 2) {
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
}
return (int)(matchptr - pMatches);
}
/**
* Skip previously compressed bytes
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match match;
int i;
/* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
* we don't store the matches. */
for (i = nStartOffset; i < nEndOffset; i++) {
lzsa_find_matches_at(pCompressor, i, &match, 0, 0);
}
}
/**
* Find all matches for the data to be compressed
*
* @param pCompressor compression context
* @param nMatchesPerOffset maximum number of matches to store for each offset
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset) {
lzsa_match *pMatch = pCompressor->match;
int i;
for (i = nStartOffset; i < nEndOffset; i++) {
int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, nMatchesPerOffset, nEndOffset - nStartOffset);
while (nMatches < nMatchesPerOffset) {
pMatch[nMatches].length = 0;
pMatch[nMatches].offset = 0;
nMatches++;
}
pMatch += nMatchesPerOffset;
}
}

91
src/matchfinder.h Normal file
View File

@ -0,0 +1,91 @@
/*
* matchfinder.h - LZ match finder definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _MATCHFINDER_H
#define _MATCHFINDER_H
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return number of matches
*/
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize);
/**
* Skip previously compressed bytes
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
/**
* Find all matches for the data to be compressed
*
* @param pCompressor compression context
* @param nMatchesPerOffset maximum number of matches to store for each offset
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset);
#ifdef __cplusplus
}
#endif
#endif /* _MATCHFINDER_H */

View File

@ -1,721 +0,0 @@
/*
* shrink.c - block compressor implementation
*
* The following copying information applies to this specific source code file:
*
* Written in 2019 by Emmanuel Marty <marty.emmanuel@gmail.com>
* Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
* Dedication (the "CC0").
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
*
* You should have received a copy of the CC0 along with this software; if not
* see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "divsufsort.h"
#include "shrink.h"
#include "format.h"
#define LCP_BITS 14
#define LCP_MAX ((1<<LCP_BITS) - 1)
#define LCP_SHIFT (32-LCP_BITS)
#define LCP_MASK (LCP_MAX << LCP_SHIFT)
#define POS_MASK ((1<<LCP_SHIFT) - 1)
#define NMATCHES_PER_OFFSET 8
#define MATCHES_PER_OFFSET_SHIFT 3
#define LEAVE_ALONE_MATCH_SIZE 1000
#define LAST_MATCH_OFFSET 4
#define LAST_LITERALS 1
/** One match */
typedef struct _lzsa_match {
unsigned short length;
unsigned short offset;
} lzsa_match;
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
pCompressor->pos_data = NULL;
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->num_commands = 0;
if (pCompressor->intervals) {
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->match)
return 0;
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
}
free(pCompressor->pos_data);
pCompressor->pos_data = NULL;
}
free(pCompressor->intervals);
pCompressor->intervals = NULL;
}
return 100;
}
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lsza_compressor *pCompressor) {
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;
}
if (pCompressor->open_intervals) {
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
}
if (pCompressor->pos_data) {
free(pCompressor->pos_data);
pCompressor->pos_data = NULL;
}
if (pCompressor->intervals) {
free(pCompressor->intervals);
pCompressor->intervals = NULL;
}
}
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
static int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
/* Build suffix array from input data */
if (divsufsort(pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
return 100;
}
int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
int *Phi = PLCP;
int nCurLen = 0;
int i;
/* Compute the permuted LCP first (K<>rkk<6B>inen method) */
Phi[intervals[0]] = -1;
for (i = 1; i < nInWindowSize; i++)
Phi[intervals[i]] = intervals[i - 1];
for (i = 0; i < nInWindowSize; i++) {
if (Phi[i] == -1) {
PLCP[i] = 0;
continue;
}
int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
PLCP[i] = nCurLen;
if (nCurLen > 0)
nCurLen--;
}
/* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
* saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
* and the interval builder below doesn't need it either. */
intervals[0] &= POS_MASK;
for (i = 1; i < nInWindowSize - 1; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < MIN_MATCH_SIZE)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
}
if (i < nInWindowSize)
intervals[i] &= POS_MASK;
/**
* Build intervals for finding matches
*
* Methodology and code fragment taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
unsigned int * const SA_and_LCP = intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int next_interval_idx;
unsigned int *top = pCompressor->open_intervals;
unsigned int prev_pos = SA_and_LCP[0] & POS_MASK;
*top = 0;
intervals[0] = 0;
next_interval_idx = 1;
for (int r = 1; r < nInWindowSize; r++) {
const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
const unsigned int top_lcp = *top & LCP_MASK;
if (next_lcp == top_lcp) {
/* Continuing the deepest open interval */
pos_data[prev_pos] = *top;
}
else if (next_lcp > top_lcp) {
/* Opening a new interval */
*++top = next_lcp | next_interval_idx++;
pos_data[prev_pos] = *top;
}
else {
/* Closing the deepest open interval */
pos_data[prev_pos] = *top;
for (;;) {
const unsigned int closed_interval_idx = *top-- & POS_MASK;
const unsigned int superinterval_lcp = *top & LCP_MASK;
if (next_lcp == superinterval_lcp) {
/* Continuing the superinterval */
intervals[closed_interval_idx] = *top;
break;
}
else if (next_lcp > superinterval_lcp) {
/* Creating a new interval that is a
* superinterval of the one being
* closed, but still a subinterval of
* its superinterval */
*++top = next_lcp | next_interval_idx++;
intervals[closed_interval_idx] = *top;
break;
}
else {
/* Also closing the superinterval */
intervals[closed_interval_idx] = *top;
}
}
}
prev_pos = next_pos;
}
/* Close any still-open intervals. */
pos_data[prev_pos] = *top;
for (; top > pCompressor->open_intervals; top--)
intervals[*top & POS_MASK] = *(top - 1);
/* Success */
return 0;
}
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
*
* @return number of matches
*/
static int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref;
unsigned int super_ref;
unsigned int match_pos;
lzsa_match *matchptr;
/**
* Find matches using intervals
*
* Taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
/* Get the deepest lcp-interval containing the current suffix. */
ref = pos_data[nOffset];
pos_data[nOffset] = 0;
/* Ascend until we reach a visited interval, the root, or a child of the
* root. Link unvisited intervals to the current suffix as we go. */
while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
intervals[ref & POS_MASK] = nOffset;
ref = super_ref;
}
if (super_ref == 0) {
/* In this case, the current interval may be any of:
* (1) the root;
* (2) an unvisited child of the root;
* (3) an interval last visited by suffix 0
*
* We could avoid the ambiguity with (3) by using an lcp
* placeholder value other than 0 to represent "visited", but
* it's fastest to use 0. So we just don't allow matches with
* position 0. */
if (ref != 0) /* Not the root? */
intervals[ref & POS_MASK] = nOffset;
return 0;
}
/* Ascend indirectly via pos_data[] links. */
match_pos = super_ref;
matchptr = pMatches;
for (;;) {
while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK];
intervals[ref & POS_MASK] = nOffset;
pos_data[match_pos] = ref;
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
}
}
if (super_ref == 0)
break;
ref = super_ref;
match_pos = intervals[ref & POS_MASK];
}
return (int)(matchptr - pMatches);
}
/**
* Skip previously compressed bytes
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
static void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match match;
int i;
/* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
* we don't store the matches. */
for (i = nStartOffset; i < nEndOffset; i++) {
lzsa_find_matches_at(pCompressor, i, &match, 0);
}
}
/**
* Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
* the optimizer to look at.
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
int i;
for (i = nStartOffset; i < nEndOffset; i++) {
int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, NMATCHES_PER_OFFSET);
int m;
for (m = 0; m < NMATCHES_PER_OFFSET; m++) {
if (nMatches <= m || i > (nEndOffset - LAST_MATCH_OFFSET)) {
pMatch->length = 0;
pMatch->offset = 0;
}
else {
int nMaxLen = (nEndOffset - LAST_LITERALS) - i;
if (nMaxLen < 0)
nMaxLen = 0;
if (pMatch->length > nMaxLen)
pMatch->length = (unsigned short)nMaxLen;
}
pMatch++;
}
}
}
/**
* Get the number of extra bytes required to represent a literals length
*
* @param nLength literals length
*
* @return number of extra bytes required
*/
static inline int lzsa_get_literals_varlen_size(const int nLength) {
if (nLength < LITERALS_RUN_LEN) {
return 0;
}
else {
if (nLength < (LITERALS_RUN_LEN + 254))
return 1;
else {
if (nLength < (LITERALS_RUN_LEN + 510))
return 2;
else
return 3;
}
}
}
/**
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= LITERALS_RUN_LEN) {
if (nLength < (LITERALS_RUN_LEN + 254))
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN;
else {
if (nLength < (LITERALS_RUN_LEN + 510)) {
pOutData[nOutOffset++] = 254;
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN - 254;
}
else {
pOutData[nOutOffset++] = 255;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get the number of extra bytes required to represent an encoded match length
*
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE)
*
* @return number of extra bytes required
*/
static inline int lzsa_get_match_varlen_size(const int nLength) {
if (nLength < MATCH_RUN_LEN) {
return 0;
}
else {
if (nLength < (MATCH_RUN_LEN + 254))
return 1;
else {
if (nLength < (MATCH_RUN_LEN + 510))
return 2;
else
return 3;
}
}
}
/**
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE)
*/
static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= MATCH_RUN_LEN) {
if (nLength < (MATCH_RUN_LEN + 254))
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN;
else {
if (nLength < (MATCH_RUN_LEN + 510)) {
pOutData[nOutOffset++] = 254;
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN - 254;
}
else {
pOutData[nOutOffset++] = 255;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int nLastLiteralsOffset;
int i;
cost[nEndOffset - 1] = 1;
nLastLiteralsOffset = nEndOffset - 1;
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
int nBestCost, nBestMatchLen, nBestMatchOffset;
int nLiteralsLen = nLastLiteralsOffset - i;
nBestCost = 1 + cost[i + 1] + lzsa_get_literals_varlen_size(nLiteralsLen);
nBestMatchLen = 0;
nBestMatchOffset = 0;
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int m;
for (m = 0; m < NMATCHES_PER_OFFSET; m++) {
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 1 : 2;
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
int nCurCost;
int nMatchLen = pMatch[m].length;
int nRemainingLiteralsLen = nLastLiteralsOffset - (i + nMatchLen);
if (nRemainingLiteralsLen < 0) nRemainingLiteralsLen = 0;
nCurCost = 1 + lzsa_get_literals_varlen_size(nRemainingLiteralsLen) + nMatchOffsetSize + lzsa_get_match_varlen_size(nMatchLen - MIN_MATCH_SIZE);
if ((i + nMatchLen) < nEndOffset)
nCurCost += cost[i + nMatchLen];
if (nBestCost >= nCurCost) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
}
}
else {
if (pMatch[m].length >= MIN_MATCH_SIZE) {
int k;
for (k = MIN_MATCH_SIZE; k <= pMatch[m].length; k++) {
int nCurCost;
int nRemainingLiteralsLen = nLastLiteralsOffset - (i + k);
if (nRemainingLiteralsLen < 0) nRemainingLiteralsLen = 0;
nCurCost = 1 + lzsa_get_literals_varlen_size(nRemainingLiteralsLen) + nMatchOffsetSize + lzsa_get_match_varlen_size(k - MIN_MATCH_SIZE);
if ((i + k) < nEndOffset)
nCurCost += cost[i + k];
if (nBestCost >= nCurCost) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
}
}
}
if (nBestMatchLen >= MIN_MATCH_SIZE)
nLastLiteralsOffset = i;
cost[i] = nBestCost;
pMatch->length = nBestMatchLen;
pMatch->offset = nBestMatchOffset;
}
}
/**
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
* impacting the compression ratio
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_command_count(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
int nNibbleLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nTokenSize = 1 /* nibble */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNibbleLongOffset ? 2 : 1) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
if ((((nNumLiterals + nMatchLen) < LITERALS_RUN_LEN && nTokenSize >= nMatchLen) ||
((nNumLiterals + nMatchLen) < (LITERALS_RUN_LEN + 254) && nTokenSize >= (nMatchLen + 1))) &&
(i + nMatchLen) < nEndOffset && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) {
int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
}
else {
nNumLiterals = 0;
i += nMatchLen;
}
}
else {
nNumLiterals++;
i++;
}
}
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
int nNibbleLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
int nNibbleMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN) ? MATCH_RUN_LEN : nEncodedMatchLen;
int nNibbleLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nTokenSize = 1 /* nibble */ + lzsa_get_literals_varlen_size(nNumLiterals) + nNumLiterals + (nNibbleLongOffset ? 2 : 1) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
if ((nOutOffset + nTokenSize) > nMaxOutDataSize)
return -1;
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
return -1;
pOutData[nOutOffset++] = nNibbleLongOffset | (nNibbleLiteralsLen << 4) | nNibbleMatchLen;
nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
if (nNibbleLongOffset) {
pOutData[nOutOffset++] = (nMatchOffset - 1) >> 8;
}
nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen);
i += nMatchLen;
pCompressor->num_commands++;
}
else {
if (nNumLiterals == 0)
nInFirstLiteralOffset = i;
nNumLiterals++;
i++;
}
}
{
int nNibbleLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
int nTokenSize = 1 /* nibble */ + lzsa_get_literals_varlen_size(nNumLiterals) + nNumLiterals;
if ((nOutOffset + nTokenSize) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = (nNibbleLiteralsLen << 4) | 0x0f;
nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
}
return nOutOffset;
}
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize);
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
lzsa_optimize_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
lzsa_optimize_command_count(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
return lzsa_write_block(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) {
return pCompressor->num_commands;
}

710
src/shrink_block_v1.c Normal file
View File

@ -0,0 +1,710 @@
/*
* shrink_block_v1.c - LZSA1 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "lib.h"
#include "shrink_block_v1.h"
#include "format.h"
/**
* Get the number of extra bits required to represent a literals length
*
* @param nLength literals length
*
* @return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
if (nLength < LITERALS_RUN_LEN_V1) {
return 0;
}
else {
if (nLength < 256)
return 8;
else {
if (nLength < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= LITERALS_RUN_LEN_V1) {
if (nLength < 256)
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
else {
if (nLength < 512) {
pOutData[nOutOffset++] = 250;
pOutData[nOutOffset++] = nLength - 256;
}
else {
pOutData[nOutOffset++] = 249;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*
* @return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
if (nLength < MATCH_RUN_LEN_V1) {
return 0;
}
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
return 8;
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*/
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= MATCH_RUN_LEN_V1) {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 512) {
pOutData[nOutOffset++] = 239;
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256;
}
else {
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff;
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get offset encoding cost in bits
*
* @param nMatchOffset offset to get cost of
*
* @return cost in bits
*/
static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
return (nMatchOffset <= 256) ? 8 : 16;
}
/**
* Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
const int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
int i, j, n;
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
memset(arrival + (nStartOffset << ARRIVALS_PER_POSITION_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << ARRIVALS_PER_POSITION_SHIFT));
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1;
for (i = nStartOffset; i != nEndOffset; i++) {
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
int m;
for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nScore = cur_arrival[j].score + 1;
int nNumLiterals = cur_arrival[j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
if (nNumLiterals == 1)
nCodingChoiceCost += nModeSwitchPenalty;
lzsa_arrival *pDestSlots = &arrival[(i + 1) << ARRIVALS_PER_POSITION_SHIFT];
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n + 1],
&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->score = nScore;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
break;
}
}
}
const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
int nNumArrivalsForThisPos = j;
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT];
for (j = 0; j < nNumArrivalsForThisPos; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0;
if (!cur_arrival[j].num_literals)
nCodingChoiceCost += nModeSwitchPenalty;
for (n = 0;
n < NARRIVALS_PER_POSITION_V1 && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
n++) {
if (lzsa_get_offset_cost_v1(pDestSlots[n].rep_offset) == nMatchOffsetCost) {
exists = 1;
break;
}
}
if (!exists) {
int nScore = cur_arrival[j].score + 5;
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = match[m].offset;
j = NARRIVALS_PER_POSITION_V1;
break;
}
}
}
}
}
}
}
lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
if (end_arrival->from_pos >= nEndOffset) return;
pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
if (end_arrival->match_len)
pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset;
else
pBestMatch[end_arrival->from_pos].offset = 0;
end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)];
}
}
/**
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
* impacting the compression ratio
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param pBestMatch optimal matches to emit
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length == 0 &&
(i + 1) < nEndOffset &&
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + 1].length < MAX_VARLEN &&
pBestMatch[i + 1].offset &&
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);
if ((nReducedLenSize - nCurLenSize) <= 8) {
/* Merge */
pBestMatch[i].length = pBestMatch[i + 1].length + 1;
pBestMatch[i].offset = pBestMatch[i + 1].offset;
pBestMatch[i + 1].length = 0;
pBestMatch[i + 1].offset = 0;
nDidReduce = 1;
continue;
}
}
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
if (pMatch->length <= 9 /* Don't waste time considering large matches, they will always win over literals */ &&
(i + pMatch->length) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */) {
int nNextIndex = i + pMatch->length;
int nNextLiterals = 0;
while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < MIN_MATCH_SIZE_V1) {
nNextLiterals++;
nNextIndex++;
}
/* This command is a match, is followed by 'nNextLiterals' literals and then by another match, or the end of the input. Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
if ((8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((pMatch->offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1) +
8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >=
(8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) {
/* Reduce */
int nMatchLen = pMatch->length;
int j;
for (j = 0; j < nMatchLen; j++) {
pBestMatch[i + j].length = 0;
}
nDidReduce = 1;
continue;
}
}
if ((i + pMatch->length) <= nEndOffset && pMatch->offset > 0 && pMatch->length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + pMatch->length].offset > 0 &&
pBestMatch[i + pMatch->length].length >= MIN_MATCH_SIZE_V1 &&
(pMatch->length + pBestMatch[i + pMatch->length].length) >= LEAVE_ALONE_MATCH_SIZE &&
(pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN &&
(i + pMatch->length) > pMatch->offset &&
(i + pMatch->length) > pBestMatch[i + pMatch->length].offset &&
(i + pMatch->length + pBestMatch[i + pMatch->length].length) <= nEndOffset &&
!memcmp(pInWindow + i - pMatch->offset + pMatch->length,
pInWindow + i + pMatch->length - pBestMatch[i + pMatch->length].offset,
pBestMatch[i + pMatch->length].length)) {
int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1);
nCurPartialSize += 8 /* token */ + lzsa_get_literals_varlen_size_v1(0) + ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
if (nCurPartialSize >= nReducedPartialSize) {
int nMatchLen = pMatch->length;
/* Join */
pMatch->length += pBestMatch[i + nMatchLen].length;
pBestMatch[i + nMatchLen].offset = 0;
pBestMatch[i + nMatchLen].length = -1;
continue;
}
}
i += pMatch->length;
nNumLiterals = 0;
}
else {
nNumLiterals++;
i++;
}
}
return nDidReduce;
}
/**
* Get compressed data block size
*
* @param pCompressor compression context
* @param pBestMatch optimal matches to emit
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return size of compressed data that will be written to output buffer
*/
static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nCompressedSize = 0;
for (i = nStartOffset; i < nEndOffset; ) {
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
i += nMatchLen;
}
else {
nNumLiterals++;
i++;
}
}
{
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
nCompressedSize += 8 * 4;
}
return nCompressedSize;
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pBestMatch optimal matches to emit
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
for (i = nStartOffset; i < nEndOffset; ) {
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
return -1;
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
pCompressor->stats.min_literals = nNumLiterals;
if (nNumLiterals > pCompressor->stats.max_literals)
pCompressor->stats.max_literals = nNumLiterals;
pCompressor->stats.total_literals += nNumLiterals;
pCompressor->stats.literals_divisor++;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
if (nTokenLongOffset) {
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
}
nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1)
pCompressor->stats.min_offset = nMatchOffset;
if (nMatchOffset > pCompressor->stats.max_offset)
pCompressor->stats.max_offset = nMatchOffset;
pCompressor->stats.total_offsets += nMatchOffset;
if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1)
pCompressor->stats.min_match_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_match_len)
pCompressor->stats.max_match_len = nMatchLen;
pCompressor->stats.total_match_lens += nMatchLen;
pCompressor->stats.match_divisor++;
if (nMatchOffset == 1) {
if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1)
pCompressor->stats.min_rle1_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_rle1_len)
pCompressor->stats.max_rle1_len = nMatchLen;
pCompressor->stats.total_rle1_lens += nMatchLen;
pCompressor->stats.rle1_divisor++;
}
else if (nMatchOffset == 2) {
if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1)
pCompressor->stats.min_rle2_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_rle2_len)
pCompressor->stats.max_rle2_len = nMatchLen;
pCompressor->stats.total_rle2_lens += nMatchLen;
pCompressor->stats.rle2_divisor++;
}
i += nMatchLen;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
int nCurSafeDist = (i - nStartOffset) - nOutOffset;
if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
pCompressor->safe_dist = nCurSafeDist;
}
pCompressor->num_commands++;
}
else {
if (nNumLiterals == 0)
nInFirstLiteralOffset = i;
nNumLiterals++;
i++;
}
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
pCompressor->stats.min_literals = nNumLiterals;
if (nNumLiterals > pCompressor->stats.max_literals)
pCompressor->stats.max_literals = nNumLiterals;
pCompressor->stats.total_literals += nNumLiterals;
pCompressor->stats.literals_divisor++;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
int nCurSafeDist = (i - nStartOffset) - nOutOffset;
if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
pCompressor->safe_dist = nCurSafeDist;
}
pCompressor->num_commands++;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
/* Emit EOD marker for raw block */
if ((nOutOffset + 4) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 0;
}
return nOutOffset;
}
/**
* Emit raw block of uncompressible data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int nNumLiterals = nEndOffset - nStartOffset;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nOutOffset = 0;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4;
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
pCompressor->num_commands = 0;
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nStartOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
/* Emit EOD marker for raw block */
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 0;
return nOutOffset;
}
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nResult, nBaseCompressedSize;
/* Compress optimally without breaking ties in favor of less tokens */
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
nBaseCompressedSize = lzsa_get_compressed_size_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
lzsa_match *pBestMatch = pCompressor->best_match - nPreviousBlockSize;
if (nBaseCompressedSize > 0 && nInDataSize < 65536) {
int nReducedCompressedSize;
/* Compress optimally and do break ties in favor of less tokens */
memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */);
nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
nReducedCompressedSize = lzsa_get_compressed_size_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (nReducedCompressedSize > 0 && nReducedCompressedSize <= nBaseCompressedSize) {
/* Pick the parse with the reduced number of tokens as it didn't negatively affect the size */
pBestMatch = pCompressor->improved_match - nPreviousBlockSize;
}
}
nResult = lzsa_write_block_v1(pCompressor, pBestMatch, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
if (nResult < 0 && pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
nResult = lzsa_write_raw_uncompressed_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}
return nResult;
}

57
src/shrink.h → src/shrink_block_v1.h Executable file → Normal file
View File

@ -1,5 +1,5 @@
/*
* shrink.h - block compressor definitions
* shrink_block_v1.h - LZSA1 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -20,40 +20,24 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef _SHRINK_H
#define _SHRINK_H
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_BLOCK_V1_H
#define _SHRINK_BLOCK_V1_H
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
/** Compression context */
typedef struct {
unsigned int *intervals;
unsigned int *pos_data;
unsigned int *open_intervals;
lzsa_match *match;
int num_commands;
} lsza_compressor;
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize);
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lsza_compressor *pCompressor);
/**
* Compress one block of data
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
@ -64,13 +48,6 @@ void lzsa_compressor_destroy(lsza_compressor *pCompressor);
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lsza_compressor *pCompressor);
#endif /* _SHRINK_H */
#endif /* _SHRINK_BLOCK_V1_H */

1367
src/shrink_block_v2.c Normal file

File diff suppressed because it is too large Load Diff

53
src/shrink_block_v2.h Normal file
View File

@ -0,0 +1,53 @@
/*
* shrink_block_v2.h - LZSA2 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_BLOCK_V2_H
#define _SHRINK_BLOCK_V2_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_BLOCK_V2_H */

254
src/shrink_context.c Normal file
View File

@ -0,0 +1,254 @@
/*
* shrink_context.c - compression context implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_context.h"
#include "shrink_block_v1.h"
#include "shrink_block_v2.h"
#include "format.h"
#include "matchfinder.h"
#include "lib.h"
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
int nResult;
int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
int nMaxMinMatchForFormat = (nFormatVersion == 1) ? 5 : 3;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
pCompressor->intervals = NULL;
pCompressor->pos_data = NULL;
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->best_match = NULL;
pCompressor->improved_match = NULL;
pCompressor->arrival = NULL;
pCompressor->rep_handled_mask = NULL;
pCompressor->first_offset_for_byte = NULL;
pCompressor->next_offset_for_pos = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
else if (pCompressor->min_match_size > nMaxMinMatchForFormat)
pCompressor->min_match_size = nMaxMinMatchForFormat;
pCompressor->format_version = nFormatVersion;
pCompressor->flags = nFlags;
pCompressor->safe_dist = 0;
pCompressor->num_commands = 0;
memset(&pCompressor->stats, 0, sizeof(pCompressor->stats));
pCompressor->stats.min_literals = -1;
pCompressor->stats.min_match_len = -1;
pCompressor->stats.min_offset = -1;
pCompressor->stats.min_rle1_len = -1;
pCompressor->stats.min_rle2_len = -1;
if (!nResult) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->intervals) {
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT) * sizeof(lzsa_arrival));
if (pCompressor->arrival) {
pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
if (pCompressor->best_match) {
pCompressor->improved_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
if (pCompressor->improved_match) {
if (pCompressor->format_version == 2)
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V2 * sizeof(lzsa_match));
else
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->rep_handled_mask = (char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
return 0;
}
}
}
}
else {
return 0;
}
}
}
}
}
}
}
}
}
lzsa_compressor_destroy(pCompressor);
return 100;
}
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->next_offset_for_pos) {
free(pCompressor->next_offset_for_pos);
pCompressor->next_offset_for_pos = NULL;
}
if (pCompressor->first_offset_for_byte) {
free(pCompressor->first_offset_for_byte);
pCompressor->first_offset_for_byte = NULL;
}
if (pCompressor->rep_handled_mask) {
free(pCompressor->rep_handled_mask);
pCompressor->rep_handled_mask = NULL;
}
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;
}
if (pCompressor->improved_match) {
free(pCompressor->improved_match);
pCompressor->improved_match = NULL;
}
if (pCompressor->arrival) {
free(pCompressor->arrival);
pCompressor->arrival = NULL;
}
if (pCompressor->best_match) {
free(pCompressor->best_match);
pCompressor->best_match = NULL;
}
if (pCompressor->open_intervals) {
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
}
if (pCompressor->pos_data) {
free(pCompressor->pos_data);
pCompressor->pos_data = NULL;
}
if (pCompressor->intervals) {
free(pCompressor->intervals);
pCompressor->intervals = NULL;
}
}
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nCompressedSize;
if (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInWindow + nPreviousBlockSize, nInDataSize);
}
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
nCompressedSize = -1;
else {
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, (pCompressor->format_version == 2) ? NMATCHES_PER_INDEX_V2 : NMATCHES_PER_INDEX_V1, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (pCompressor->format_version == 1) {
nCompressedSize = lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
if (nCompressedSize != -1 && (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData, nCompressedSize);
}
}
else if (pCompressor->format_version == 2) {
nCompressedSize = lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
if (nCompressedSize != -1 && (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData, nCompressedSize);
}
}
else {
nCompressedSize = -1;
}
}
if (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInWindow + nPreviousBlockSize, nInDataSize);
}
return nCompressedSize;
}
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor) {
return pCompressor->num_commands;
}

182
src/shrink_context.h Normal file
View File

@ -0,0 +1,182 @@
/*
* shrink_context.h - compression context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_CONTEXT_H
#define _SHRINK_CONTEXT_H
#include "divsufsort.h"
#ifdef __cplusplus
extern "C" {
#endif
#define LCP_BITS 14
#define TAG_BITS 4
#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
#define LCP_SHIFT (31-LCP_BITS)
#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1U<<LCP_SHIFT) - 1)
#define VISITED_FLAG 0x80000000
#define EXCL_VISITED_MASK 0x7fffffff
#define NARRIVALS_PER_POSITION_V1 8
#define NARRIVALS_PER_POSITION_V2_SMALL 9
#define NARRIVALS_PER_POSITION_V2_BIG 32
#define ARRIVALS_PER_POSITION_SHIFT 5
#define NMATCHES_PER_INDEX_V1 8
#define MATCHES_PER_INDEX_SHIFT_V1 3
#define NMATCHES_PER_INDEX_V2 64
#define MATCHES_PER_INDEX_SHIFT_V2 6
#define LEAVE_ALONE_MATCH_SIZE 300
#define LEAVE_ALONE_MATCH_SIZE_SMALL 1000
#define MODESWITCH_PENALTY 3
/** One match */
typedef struct _lzsa_match {
unsigned short length;
unsigned short offset;
} lzsa_match;
/** Forward arrival slot */
typedef struct {
int cost;
unsigned short rep_offset;
short from_slot;
int from_pos;
unsigned short rep_len;
unsigned short match_len;
int rep_pos;
int num_literals;
int score;
} lzsa_arrival;
/** Compression statistics */
typedef struct _lzsa_stats {
int min_literals;
int max_literals;
int total_literals;
int min_offset;
int max_offset;
int num_rep_offsets;
int total_offsets;
int min_match_len;
int max_match_len;
int total_match_lens;
int min_rle1_len;
int max_rle1_len;
int total_rle1_lens;
int min_rle2_len;
int max_rle2_len;
int total_rle2_lens;
int literals_divisor;
int match_divisor;
int rle1_divisor;
int rle2_divisor;
} lzsa_stats;
/** Compression context */
typedef struct _lzsa_compressor {
divsufsort_ctx_t divsufsort_context;
unsigned int *intervals;
unsigned int *pos_data;
unsigned int *open_intervals;
lzsa_match *match;
lzsa_match *best_match;
lzsa_match *improved_match;
lzsa_arrival *arrival;
char *rep_handled_mask;
int *first_offset_for_byte;
int *next_offset_for_pos;
int min_match_size;
int format_version;
int flags;
int safe_dist;
int num_commands;
lzsa_stats stats;
} lzsa_compressor;
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lzsa_compressor *pCompressor);
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_CONTEXT_H */

185
src/shrink_inmem.c Normal file
View File

@ -0,0 +1,185 @@
/*
* shrink_inmem.c - in-memory compression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_inmem.h"
#include "shrink_context.h"
#include "frame.h"
#include "format.h"
#include "lib.h"
/**
* Get maximum compressed size of input(source) data
*
* @param nInputSize input(source) size in bytes
*
* @return maximum compressed size
*/
size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize) {
return lzsa_get_header_size() + ((nInputSize + (BLOCK_SIZE - 1)) >> 16) * lzsa_get_frame_size() + nInputSize + lzsa_get_frame_size() /* footer */;
}
/**
* Compress memory
*
* @param pInputData pointer to input(source) data to compress
* @param pOutBuffer buffer for compressed data
* @param nInputSize input(source) size in bytes
* @param nMaxOutBufferSize maximum capacity of compression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
*
* @return actual compressed size, or -1 for error
*/
size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion) {
lzsa_compressor compressor;
size_t nOriginalSize = 0;
size_t nCompressedSize = 0L;
int nResult;
int nError = 0;
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
return -1;
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nHeaderSize = lzsa_encode_header(pOutBuffer, (int)nMaxOutBufferSize, nFormatVersion);
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += nHeaderSize;
}
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
while (nOriginalSize < nInputSize && !nError) {
int nInDataSize;
nInDataSize = (int)(nInputSize - nOriginalSize);
if (nInDataSize > BLOCK_SIZE)
nInDataSize = BLOCK_SIZE;
if (nInDataSize > 0) {
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
nError = LZSA_ERROR_RAW_TOOLARGE;
break;
}
int nOutDataSize;
int nOutDataEnd = (int)(nMaxOutBufferSize - (lzsa_get_frame_size() + nCompressedSize + lzsa_get_frame_size() /* footer */));
int nFrameSize = lzsa_get_frame_size();
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nFrameSize = 0;
nOutDataEnd = (int)(nMaxOutBufferSize - nCompressedSize);
}
if (nOutDataEnd > BLOCK_SIZE)
nOutDataEnd = BLOCK_SIZE;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + nFrameSize + nCompressedSize, nOutDataEnd);
if (nOutDataSize >= 0) {
/* Write compressed block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nBlockheaderSize = lzsa_encode_compressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nOutDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += nBlockheaderSize;
}
}
if (!nError) {
nOriginalSize += nInDataSize;
nCompressedSize += nOutDataSize;
}
}
else {
/* Write uncompressible, literal block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
break;
}
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nInDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if ((size_t)nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
nError = LZSA_ERROR_DST;
else {
memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);
nOriginalSize += nInDataSize;
nCompressedSize += nBlockheaderSize + nInDataSize;
}
}
}
nPreviousBlockSize = nInDataSize;
nNumBlocks++;
}
}
if (!nError) {
int nFooterSize;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nFooterSize = 0;
}
else {
nFooterSize = lzsa_encode_footer_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize));
if (nFooterSize < 0)
nError = LZSA_ERROR_COMPRESSION;
}
nCompressedSize += nFooterSize;
}
lzsa_compressor_destroy(&compressor);
if (nError) {
return -1;
}
else {
return nCompressedSize;
}
}

71
src/shrink_inmem.h Normal file
View File

@ -0,0 +1,71 @@
/*
* shrink_inmem.h - in-memory compression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_INMEM_H
#define _SHRINK_INMEM_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get maximum compressed size of input(source) data
*
* @param nInputSize input(source) size in bytes
*
* @return maximum compressed size
*/
size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
/**
* Compress memory
*
* @param pInputData pointer to input(source) data to compress
* @param pOutBuffer buffer for compressed data
* @param nInputSize input(source) size in bytes
* @param nMaxOutBufferSize maximum capacity of compression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
*
* @return actual compressed size, or -1 for error
*/
size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_INMEM_H */

320
src/shrink_streaming.c Normal file
View File

@ -0,0 +1,320 @@
/*
* shrink_streaming.c - streaming compression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_streaming.h"
#include "format.h"
#include "frame.h"
#include "lib.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <stdio.h>
#endif
/**
* Delete file
*
* @param pszInFilename name of file to delete
*/
static void lzsa_delete_file(const char *pszInFilename) {
#ifdef _WIN32
DeleteFileA(pszInFilename);
#else
remove(pszInFilename);
#endif
}
/*-------------- File API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
lzsa_status_t nStatus;
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
return LZSA_ERROR_SRC;
}
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
inStream.close(&inStream);
return LZSA_ERROR_DST;
}
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
outStream.close(&outStream);
inStream.close(&inStream);
lzsa_delete_file(pszOutFilename);
return nStatus;
}
nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount, pSafeDist, pStats);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
inStream.close(&inStream);
if (nStatus) {
lzsa_delete_file(pszOutFilename);
}
return nStatus;
}
/*-------------- Streaming API -------------- */
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
unsigned char *pInData, *pOutData;
lzsa_compressor compressor;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nResult;
unsigned char cFrameData[16];
int nError = 0;
int nRawPadding = (nFlags & LZSA_FLAG_RAW_BLOCK) ? 8 : 0;
pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pInData) {
return LZSA_ERROR_MEMORY;
}
memset(pInData, 0, BLOCK_SIZE * 2);
pOutData = (unsigned char*)malloc(BLOCK_SIZE);
if (!pOutData) {
free(pInData);
pInData = NULL;
return LZSA_ERROR_MEMORY;
}
memset(pOutData, 0, BLOCK_SIZE);
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
return LZSA_ERROR_MEMORY;
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nHeaderSize;
}
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
while (!pInStream->eof(pInStream) && !nError) {
int nInDataSize;
if (nPreviousBlockSize) {
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
}
else if (nDictionaryDataSize && pDictionaryData) {
nPreviousBlockSize = nDictionaryDataSize;
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize);
}
nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE);
if (nInDataSize > 0) {
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
nError = LZSA_ERROR_RAW_TOOLARGE;
break;
}
nDictionaryDataSize = 0;
int nOutDataSize;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, ((nInDataSize + nRawPadding) >= BLOCK_SIZE) ? BLOCK_SIZE : (nInDataSize + nRawPadding));
if (nOutDataSize >= 0) {
/* Write compressed block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += (long long)nBlockheaderSize;
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
nError = LZSA_ERROR_DST;
}
}
}
if (!nError) {
if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) {
nError = LZSA_ERROR_DST;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nOutDataSize;
}
}
}
else {
/* Write uncompressible, literal block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
break;
}
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
nError = LZSA_ERROR_DST;
}
else {
if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) {
nError = LZSA_ERROR_DST;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize;
}
}
}
}
nPreviousBlockSize = nInDataSize;
nNumBlocks++;
}
if (!nError && !pInStream->eof(pInStream)) {
if (progress)
progress(nOriginalSize, nCompressedSize);
}
}
if (!nError) {
int nFooterSize;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nFooterSize = 0;
}
else {
nFooterSize = lzsa_encode_footer_frame(cFrameData, 16);
if (nFooterSize < 0)
nError = LZSA_ERROR_COMPRESSION;
}
if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nFooterSize;
}
if (progress)
progress(nOriginalSize, nCompressedSize);
int nCommandCount = lzsa_compressor_get_command_count(&compressor);
int nSafeDist = compressor.safe_dist;
if (pStats)
*pStats = compressor.stats;
lzsa_compressor_destroy(&compressor);
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
if (nError) {
return nError;
}
else {
if (pOriginalSize)
*pOriginalSize = nOriginalSize;
if (pCompressedSize)
*pCompressedSize = nCompressedSize;
if (pCommandCount)
*pCommandCount = nCommandCount;
if (pSafeDist)
*pSafeDist = nSafeDist;
return LZSA_OK;
}
}

99
src/shrink_streaming.h Normal file
View File

@ -0,0 +1,99 @@
/*
* shrink_streaming.h - streaming compression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_STREAMING_H
#define _SHRINK_STREAMING_H
#include "stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
typedef struct _lzsa_stats lzsa_stats;
/*-------------- File API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
/*-------------- Streaming API -------------- */
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_STREAMING_H */

111
src/stream.c Normal file
View File

@ -0,0 +1,111 @@
/*
* stream.c - streaming I/O implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "stream.h"
/**
* Close file stream
*
* @param stream stream
*/
static void lzsa_filestream_close(lzsa_stream_t *stream) {
if (stream->obj) {
fclose((FILE*)stream->obj);
stream->obj = NULL;
stream->read = NULL;
stream->write = NULL;
stream->eof = NULL;
stream->close = NULL;
}
}
/**
* Read from file stream
*
* @param stream stream
* @param ptr buffer to read into
* @param size number of bytes to read
*
* @return number of bytes read
*/
static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size) {
return fread(ptr, 1, size, (FILE*)stream->obj);
}
/**
* Write to file stream
*
* @param stream stream
* @param ptr buffer to write from
* @param size number of bytes to write
*
* @return number of bytes written
*/
static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t size) {
return fwrite(ptr, 1, size, (FILE*)stream->obj);
}
/**
* Check if file stream has reached the end of the data
*
* @param stream stream
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
static int lzsa_filestream_eof(lzsa_stream_t *stream) {
return feof((FILE*)stream->obj);
}
/**
* Open file and create an I/O stream from it
*
* @param stream stream to fill out
* @param pszInFilename filename
* @param pszMode open mode, as with fopen()
*
* @return 0 for success, nonzero for failure
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode) {
stream->obj = (void*)fopen(pszInFilename, pszMode);
if (stream->obj) {
stream->read = lzsa_filestream_read;
stream->write = lzsa_filestream_write;
stream->eof = lzsa_filestream_eof;
stream->close = lzsa_filestream_close;
return 0;
}
else
return -1;
}

103
src/stream.h Normal file
View File

@ -0,0 +1,103 @@
/*
* stream.h - streaming I/O definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _STREAM_H
#define _STREAM_H
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef struct _lzsa_stream_t lzsa_stream_t;
/* I/O stream */
typedef struct _lzsa_stream_t {
/** Opaque stream-specific pointer */
void *obj;
/**
* Read from stream
*
* @param stream stream
* @param ptr buffer to read into
* @param size number of bytes to read
*
* @return number of bytes read
*/
size_t(*read)(lzsa_stream_t *stream, void *ptr, size_t size);
/**
* Write to stream
*
* @param stream stream
* @param ptr buffer to write from
* @param size number of bytes to write
*
* @return number of bytes written
*/
size_t(*write)(lzsa_stream_t *stream, void *ptr, size_t size);
/**
* Check if stream has reached the end of the data
*
* @param stream stream
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
int(*eof)(lzsa_stream_t *stream);
/**
* Close stream
*
* @param stream stream
*/
void(*close)(lzsa_stream_t *stream);
} lzsa_stream_t;
/**
* Open file and create an I/O stream from it
*
* @param stream stream to fill out
* @param pszInFilename filename
* @param pszMode open mode, as with fopen()
*
* @return 0 for success, nonzero for failure
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode);
#ifdef __cplusplus
}
#endif
#endif /* _STREAM_H */